1 /* 2 * IP multicast routing support for mrouted 3.6/3.8 3 * 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> 5 * Linux Consultancy and Custom Driver Development 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Fixes: 13 * Michael Chastain : Incorrect size of copying. 14 * Alan Cox : Added the cache manager code 15 * Alan Cox : Fixed the clone/copy bug and device race. 16 * Mike McLagan : Routing by source 17 * Malcolm Beattie : Buffer handling fixes. 18 * Alexey Kuznetsov : Double buffer free and other fixes. 19 * SVR Anand : Fixed several multicast bugs and problems. 20 * Alexey Kuznetsov : Status, optimisations and more. 21 * Brad Parker : Better behaviour on mrouted upcall 22 * overflow. 23 * Carlos Picoto : PIMv1 Support 24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 25 * Relax this requirement to work with older peers. 26 * 27 */ 28 29 #include <linux/uaccess.h> 30 #include <linux/types.h> 31 #include <linux/cache.h> 32 #include <linux/capability.h> 33 #include <linux/errno.h> 34 #include <linux/mm.h> 35 #include <linux/kernel.h> 36 #include <linux/fcntl.h> 37 #include <linux/stat.h> 38 #include <linux/socket.h> 39 #include <linux/in.h> 40 #include <linux/inet.h> 41 #include <linux/netdevice.h> 42 #include <linux/inetdevice.h> 43 #include <linux/igmp.h> 44 #include <linux/proc_fs.h> 45 #include <linux/seq_file.h> 46 #include <linux/mroute.h> 47 #include <linux/init.h> 48 #include <linux/if_ether.h> 49 #include <linux/slab.h> 50 #include <net/net_namespace.h> 51 #include <net/ip.h> 52 #include <net/protocol.h> 53 #include <linux/skbuff.h> 54 #include <net/route.h> 55 #include <net/icmp.h> 56 #include <net/udp.h> 57 #include <net/raw.h> 58 #include <linux/notifier.h> 59 #include <linux/if_arp.h> 60 #include <linux/netfilter_ipv4.h> 61 #include <linux/compat.h> 62 #include <linux/export.h> 63 #include <linux/rhashtable.h> 64 #include <net/ip_tunnels.h> 65 #include <net/checksum.h> 66 #include <net/netlink.h> 67 #include <net/fib_rules.h> 68 #include <linux/netconf.h> 69 #include <net/nexthop.h> 70 71 #include <linux/nospec.h> 72 73 struct ipmr_rule { 74 struct fib_rule common; 75 }; 76 77 struct ipmr_result { 78 struct mr_table *mrt; 79 }; 80 81 /* Big lock, protecting vif table, mrt cache and mroute socket state. 82 * Note that the changes are semaphored via rtnl_lock. 83 */ 84 85 static DEFINE_RWLOCK(mrt_lock); 86 87 /* Multicast router control variables */ 88 89 /* Special spinlock for queue of unresolved entries */ 90 static DEFINE_SPINLOCK(mfc_unres_lock); 91 92 /* We return to original Alan's scheme. Hash table of resolved 93 * entries is changed only in process context and protected 94 * with weak lock mrt_lock. Queue of unresolved entries is protected 95 * with strong spinlock mfc_unres_lock. 96 * 97 * In this case data path is free of exclusive locks at all. 98 */ 99 100 static struct kmem_cache *mrt_cachep __ro_after_init; 101 102 static struct mr_table *ipmr_new_table(struct net *net, u32 id); 103 static void ipmr_free_table(struct mr_table *mrt); 104 105 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 106 struct net_device *dev, struct sk_buff *skb, 107 struct mfc_cache *cache, int local); 108 static int ipmr_cache_report(struct mr_table *mrt, 109 struct sk_buff *pkt, vifi_t vifi, int assert); 110 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 111 int cmd); 112 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); 113 static void mroute_clean_tables(struct mr_table *mrt, int flags); 114 static void ipmr_expire_process(struct timer_list *t); 115 116 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 117 #define ipmr_for_each_table(mrt, net) \ 118 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) 119 120 static struct mr_table *ipmr_mr_table_iter(struct net *net, 121 struct mr_table *mrt) 122 { 123 struct mr_table *ret; 124 125 if (!mrt) 126 ret = list_entry_rcu(net->ipv4.mr_tables.next, 127 struct mr_table, list); 128 else 129 ret = list_entry_rcu(mrt->list.next, 130 struct mr_table, list); 131 132 if (&ret->list == &net->ipv4.mr_tables) 133 return NULL; 134 return ret; 135 } 136 137 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 138 { 139 struct mr_table *mrt; 140 141 ipmr_for_each_table(mrt, net) { 142 if (mrt->id == id) 143 return mrt; 144 } 145 return NULL; 146 } 147 148 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 149 struct mr_table **mrt) 150 { 151 int err; 152 struct ipmr_result res; 153 struct fib_lookup_arg arg = { 154 .result = &res, 155 .flags = FIB_LOOKUP_NOREF, 156 }; 157 158 /* update flow if oif or iif point to device enslaved to l3mdev */ 159 l3mdev_update_flow(net, flowi4_to_flowi(flp4)); 160 161 err = fib_rules_lookup(net->ipv4.mr_rules_ops, 162 flowi4_to_flowi(flp4), 0, &arg); 163 if (err < 0) 164 return err; 165 *mrt = res.mrt; 166 return 0; 167 } 168 169 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, 170 int flags, struct fib_lookup_arg *arg) 171 { 172 struct ipmr_result *res = arg->result; 173 struct mr_table *mrt; 174 175 switch (rule->action) { 176 case FR_ACT_TO_TBL: 177 break; 178 case FR_ACT_UNREACHABLE: 179 return -ENETUNREACH; 180 case FR_ACT_PROHIBIT: 181 return -EACCES; 182 case FR_ACT_BLACKHOLE: 183 default: 184 return -EINVAL; 185 } 186 187 arg->table = fib_rule_get_table(rule, arg); 188 189 mrt = ipmr_get_table(rule->fr_net, arg->table); 190 if (!mrt) 191 return -EAGAIN; 192 res->mrt = mrt; 193 return 0; 194 } 195 196 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 197 { 198 return 1; 199 } 200 201 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { 202 FRA_GENERIC_POLICY, 203 }; 204 205 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 206 struct fib_rule_hdr *frh, struct nlattr **tb, 207 struct netlink_ext_ack *extack) 208 { 209 return 0; 210 } 211 212 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 213 struct nlattr **tb) 214 { 215 return 1; 216 } 217 218 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 219 struct fib_rule_hdr *frh) 220 { 221 frh->dst_len = 0; 222 frh->src_len = 0; 223 frh->tos = 0; 224 return 0; 225 } 226 227 static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { 228 .family = RTNL_FAMILY_IPMR, 229 .rule_size = sizeof(struct ipmr_rule), 230 .addr_size = sizeof(u32), 231 .action = ipmr_rule_action, 232 .match = ipmr_rule_match, 233 .configure = ipmr_rule_configure, 234 .compare = ipmr_rule_compare, 235 .fill = ipmr_rule_fill, 236 .nlgroup = RTNLGRP_IPV4_RULE, 237 .policy = ipmr_rule_policy, 238 .owner = THIS_MODULE, 239 }; 240 241 static int __net_init ipmr_rules_init(struct net *net) 242 { 243 struct fib_rules_ops *ops; 244 struct mr_table *mrt; 245 int err; 246 247 ops = fib_rules_register(&ipmr_rules_ops_template, net); 248 if (IS_ERR(ops)) 249 return PTR_ERR(ops); 250 251 INIT_LIST_HEAD(&net->ipv4.mr_tables); 252 253 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 254 if (IS_ERR(mrt)) { 255 err = PTR_ERR(mrt); 256 goto err1; 257 } 258 259 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); 260 if (err < 0) 261 goto err2; 262 263 net->ipv4.mr_rules_ops = ops; 264 return 0; 265 266 err2: 267 ipmr_free_table(mrt); 268 err1: 269 fib_rules_unregister(ops); 270 return err; 271 } 272 273 static void __net_exit ipmr_rules_exit(struct net *net) 274 { 275 struct mr_table *mrt, *next; 276 277 rtnl_lock(); 278 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { 279 list_del(&mrt->list); 280 ipmr_free_table(mrt); 281 } 282 fib_rules_unregister(net->ipv4.mr_rules_ops); 283 rtnl_unlock(); 284 } 285 286 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) 287 { 288 return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR); 289 } 290 291 static unsigned int ipmr_rules_seq_read(struct net *net) 292 { 293 return fib_rules_seq_read(net, RTNL_FAMILY_IPMR); 294 } 295 296 bool ipmr_rule_default(const struct fib_rule *rule) 297 { 298 return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT; 299 } 300 EXPORT_SYMBOL(ipmr_rule_default); 301 #else 302 #define ipmr_for_each_table(mrt, net) \ 303 for (mrt = net->ipv4.mrt; mrt; mrt = NULL) 304 305 static struct mr_table *ipmr_mr_table_iter(struct net *net, 306 struct mr_table *mrt) 307 { 308 if (!mrt) 309 return net->ipv4.mrt; 310 return NULL; 311 } 312 313 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 314 { 315 return net->ipv4.mrt; 316 } 317 318 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 319 struct mr_table **mrt) 320 { 321 *mrt = net->ipv4.mrt; 322 return 0; 323 } 324 325 static int __net_init ipmr_rules_init(struct net *net) 326 { 327 struct mr_table *mrt; 328 329 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 330 if (IS_ERR(mrt)) 331 return PTR_ERR(mrt); 332 net->ipv4.mrt = mrt; 333 return 0; 334 } 335 336 static void __net_exit ipmr_rules_exit(struct net *net) 337 { 338 rtnl_lock(); 339 ipmr_free_table(net->ipv4.mrt); 340 net->ipv4.mrt = NULL; 341 rtnl_unlock(); 342 } 343 344 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) 345 { 346 return 0; 347 } 348 349 static unsigned int ipmr_rules_seq_read(struct net *net) 350 { 351 return 0; 352 } 353 354 bool ipmr_rule_default(const struct fib_rule *rule) 355 { 356 return true; 357 } 358 EXPORT_SYMBOL(ipmr_rule_default); 359 #endif 360 361 static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, 362 const void *ptr) 363 { 364 const struct mfc_cache_cmp_arg *cmparg = arg->key; 365 struct mfc_cache *c = (struct mfc_cache *)ptr; 366 367 return cmparg->mfc_mcastgrp != c->mfc_mcastgrp || 368 cmparg->mfc_origin != c->mfc_origin; 369 } 370 371 static const struct rhashtable_params ipmr_rht_params = { 372 .head_offset = offsetof(struct mr_mfc, mnode), 373 .key_offset = offsetof(struct mfc_cache, cmparg), 374 .key_len = sizeof(struct mfc_cache_cmp_arg), 375 .nelem_hint = 3, 376 .locks_mul = 1, 377 .obj_cmpfn = ipmr_hash_cmp, 378 .automatic_shrinking = true, 379 }; 380 381 static void ipmr_new_table_set(struct mr_table *mrt, 382 struct net *net) 383 { 384 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 385 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); 386 #endif 387 } 388 389 static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = { 390 .mfc_mcastgrp = htonl(INADDR_ANY), 391 .mfc_origin = htonl(INADDR_ANY), 392 }; 393 394 static struct mr_table_ops ipmr_mr_table_ops = { 395 .rht_params = &ipmr_rht_params, 396 .cmparg_any = &ipmr_mr_table_ops_cmparg_any, 397 }; 398 399 static struct mr_table *ipmr_new_table(struct net *net, u32 id) 400 { 401 struct mr_table *mrt; 402 403 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ 404 if (id != RT_TABLE_DEFAULT && id >= 1000000000) 405 return ERR_PTR(-EINVAL); 406 407 mrt = ipmr_get_table(net, id); 408 if (mrt) 409 return mrt; 410 411 return mr_table_alloc(net, id, &ipmr_mr_table_ops, 412 ipmr_expire_process, ipmr_new_table_set); 413 } 414 415 static void ipmr_free_table(struct mr_table *mrt) 416 { 417 del_timer_sync(&mrt->ipmr_expire_timer); 418 mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | 419 MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC); 420 rhltable_destroy(&mrt->mfc_hash); 421 kfree(mrt); 422 } 423 424 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 425 426 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) 427 { 428 struct net *net = dev_net(dev); 429 430 dev_close(dev); 431 432 dev = __dev_get_by_name(net, "tunl0"); 433 if (dev) { 434 const struct net_device_ops *ops = dev->netdev_ops; 435 struct ifreq ifr; 436 struct ip_tunnel_parm p; 437 438 memset(&p, 0, sizeof(p)); 439 p.iph.daddr = v->vifc_rmt_addr.s_addr; 440 p.iph.saddr = v->vifc_lcl_addr.s_addr; 441 p.iph.version = 4; 442 p.iph.ihl = 5; 443 p.iph.protocol = IPPROTO_IPIP; 444 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 445 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 446 447 if (ops->ndo_do_ioctl) { 448 mm_segment_t oldfs = get_fs(); 449 450 set_fs(KERNEL_DS); 451 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); 452 set_fs(oldfs); 453 } 454 } 455 } 456 457 /* Initialize ipmr pimreg/tunnel in_device */ 458 static bool ipmr_init_vif_indev(const struct net_device *dev) 459 { 460 struct in_device *in_dev; 461 462 ASSERT_RTNL(); 463 464 in_dev = __in_dev_get_rtnl(dev); 465 if (!in_dev) 466 return false; 467 ipv4_devconf_setall(in_dev); 468 neigh_parms_data_state_setall(in_dev->arp_parms); 469 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 470 471 return true; 472 } 473 474 static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) 475 { 476 struct net_device *dev; 477 478 dev = __dev_get_by_name(net, "tunl0"); 479 480 if (dev) { 481 const struct net_device_ops *ops = dev->netdev_ops; 482 int err; 483 struct ifreq ifr; 484 struct ip_tunnel_parm p; 485 486 memset(&p, 0, sizeof(p)); 487 p.iph.daddr = v->vifc_rmt_addr.s_addr; 488 p.iph.saddr = v->vifc_lcl_addr.s_addr; 489 p.iph.version = 4; 490 p.iph.ihl = 5; 491 p.iph.protocol = IPPROTO_IPIP; 492 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 493 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 494 495 if (ops->ndo_do_ioctl) { 496 mm_segment_t oldfs = get_fs(); 497 498 set_fs(KERNEL_DS); 499 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); 500 set_fs(oldfs); 501 } else { 502 err = -EOPNOTSUPP; 503 } 504 dev = NULL; 505 506 if (err == 0 && 507 (dev = __dev_get_by_name(net, p.name)) != NULL) { 508 dev->flags |= IFF_MULTICAST; 509 if (!ipmr_init_vif_indev(dev)) 510 goto failure; 511 if (dev_open(dev, NULL)) 512 goto failure; 513 dev_hold(dev); 514 } 515 } 516 return dev; 517 518 failure: 519 unregister_netdevice(dev); 520 return NULL; 521 } 522 523 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 524 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 525 { 526 struct net *net = dev_net(dev); 527 struct mr_table *mrt; 528 struct flowi4 fl4 = { 529 .flowi4_oif = dev->ifindex, 530 .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 531 .flowi4_mark = skb->mark, 532 }; 533 int err; 534 535 err = ipmr_fib_lookup(net, &fl4, &mrt); 536 if (err < 0) { 537 kfree_skb(skb); 538 return err; 539 } 540 541 read_lock(&mrt_lock); 542 dev->stats.tx_bytes += skb->len; 543 dev->stats.tx_packets++; 544 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); 545 read_unlock(&mrt_lock); 546 kfree_skb(skb); 547 return NETDEV_TX_OK; 548 } 549 550 static int reg_vif_get_iflink(const struct net_device *dev) 551 { 552 return 0; 553 } 554 555 static const struct net_device_ops reg_vif_netdev_ops = { 556 .ndo_start_xmit = reg_vif_xmit, 557 .ndo_get_iflink = reg_vif_get_iflink, 558 }; 559 560 static void reg_vif_setup(struct net_device *dev) 561 { 562 dev->type = ARPHRD_PIMREG; 563 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 564 dev->flags = IFF_NOARP; 565 dev->netdev_ops = ®_vif_netdev_ops; 566 dev->needs_free_netdev = true; 567 dev->features |= NETIF_F_NETNS_LOCAL; 568 } 569 570 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 571 { 572 struct net_device *dev; 573 char name[IFNAMSIZ]; 574 575 if (mrt->id == RT_TABLE_DEFAULT) 576 sprintf(name, "pimreg"); 577 else 578 sprintf(name, "pimreg%u", mrt->id); 579 580 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 581 582 if (!dev) 583 return NULL; 584 585 dev_net_set(dev, net); 586 587 if (register_netdevice(dev)) { 588 free_netdev(dev); 589 return NULL; 590 } 591 592 if (!ipmr_init_vif_indev(dev)) 593 goto failure; 594 if (dev_open(dev, NULL)) 595 goto failure; 596 597 dev_hold(dev); 598 599 return dev; 600 601 failure: 602 unregister_netdevice(dev); 603 return NULL; 604 } 605 606 /* called with rcu_read_lock() */ 607 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 608 unsigned int pimlen) 609 { 610 struct net_device *reg_dev = NULL; 611 struct iphdr *encap; 612 613 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 614 /* Check that: 615 * a. packet is really sent to a multicast group 616 * b. packet is not a NULL-REGISTER 617 * c. packet is not truncated 618 */ 619 if (!ipv4_is_multicast(encap->daddr) || 620 encap->tot_len == 0 || 621 ntohs(encap->tot_len) + pimlen > skb->len) 622 return 1; 623 624 read_lock(&mrt_lock); 625 if (mrt->mroute_reg_vif_num >= 0) 626 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; 627 read_unlock(&mrt_lock); 628 629 if (!reg_dev) 630 return 1; 631 632 skb->mac_header = skb->network_header; 633 skb_pull(skb, (u8 *)encap - skb->data); 634 skb_reset_network_header(skb); 635 skb->protocol = htons(ETH_P_IP); 636 skb->ip_summed = CHECKSUM_NONE; 637 638 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 639 640 netif_rx(skb); 641 642 return NET_RX_SUCCESS; 643 } 644 #else 645 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 646 { 647 return NULL; 648 } 649 #endif 650 651 static int call_ipmr_vif_entry_notifiers(struct net *net, 652 enum fib_event_type event_type, 653 struct vif_device *vif, 654 vifi_t vif_index, u32 tb_id) 655 { 656 return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type, 657 vif, vif_index, tb_id, 658 &net->ipv4.ipmr_seq); 659 } 660 661 static int call_ipmr_mfc_entry_notifiers(struct net *net, 662 enum fib_event_type event_type, 663 struct mfc_cache *mfc, u32 tb_id) 664 { 665 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type, 666 &mfc->_c, tb_id, &net->ipv4.ipmr_seq); 667 } 668 669 /** 670 * vif_delete - Delete a VIF entry 671 * @notify: Set to 1, if the caller is a notifier_call 672 */ 673 static int vif_delete(struct mr_table *mrt, int vifi, int notify, 674 struct list_head *head) 675 { 676 struct net *net = read_pnet(&mrt->net); 677 struct vif_device *v; 678 struct net_device *dev; 679 struct in_device *in_dev; 680 681 if (vifi < 0 || vifi >= mrt->maxvif) 682 return -EADDRNOTAVAIL; 683 684 v = &mrt->vif_table[vifi]; 685 686 if (VIF_EXISTS(mrt, vifi)) 687 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi, 688 mrt->id); 689 690 write_lock_bh(&mrt_lock); 691 dev = v->dev; 692 v->dev = NULL; 693 694 if (!dev) { 695 write_unlock_bh(&mrt_lock); 696 return -EADDRNOTAVAIL; 697 } 698 699 if (vifi == mrt->mroute_reg_vif_num) 700 mrt->mroute_reg_vif_num = -1; 701 702 if (vifi + 1 == mrt->maxvif) { 703 int tmp; 704 705 for (tmp = vifi - 1; tmp >= 0; tmp--) { 706 if (VIF_EXISTS(mrt, tmp)) 707 break; 708 } 709 mrt->maxvif = tmp+1; 710 } 711 712 write_unlock_bh(&mrt_lock); 713 714 dev_set_allmulti(dev, -1); 715 716 in_dev = __in_dev_get_rtnl(dev); 717 if (in_dev) { 718 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 719 inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 720 NETCONFA_MC_FORWARDING, 721 dev->ifindex, &in_dev->cnf); 722 ip_rt_multicast_event(in_dev); 723 } 724 725 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify) 726 unregister_netdevice_queue(dev, head); 727 728 dev_put(dev); 729 return 0; 730 } 731 732 static void ipmr_cache_free_rcu(struct rcu_head *head) 733 { 734 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 735 736 kmem_cache_free(mrt_cachep, (struct mfc_cache *)c); 737 } 738 739 static void ipmr_cache_free(struct mfc_cache *c) 740 { 741 call_rcu(&c->_c.rcu, ipmr_cache_free_rcu); 742 } 743 744 /* Destroy an unresolved cache entry, killing queued skbs 745 * and reporting error to netlink readers. 746 */ 747 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 748 { 749 struct net *net = read_pnet(&mrt->net); 750 struct sk_buff *skb; 751 struct nlmsgerr *e; 752 753 atomic_dec(&mrt->cache_resolve_queue_len); 754 755 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) { 756 if (ip_hdr(skb)->version == 0) { 757 struct nlmsghdr *nlh = skb_pull(skb, 758 sizeof(struct iphdr)); 759 nlh->nlmsg_type = NLMSG_ERROR; 760 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 761 skb_trim(skb, nlh->nlmsg_len); 762 e = nlmsg_data(nlh); 763 e->error = -ETIMEDOUT; 764 memset(&e->msg, 0, sizeof(e->msg)); 765 766 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 767 } else { 768 kfree_skb(skb); 769 } 770 } 771 772 ipmr_cache_free(c); 773 } 774 775 /* Timer process for the unresolved queue. */ 776 static void ipmr_expire_process(struct timer_list *t) 777 { 778 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); 779 struct mr_mfc *c, *next; 780 unsigned long expires; 781 unsigned long now; 782 783 if (!spin_trylock(&mfc_unres_lock)) { 784 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); 785 return; 786 } 787 788 if (list_empty(&mrt->mfc_unres_queue)) 789 goto out; 790 791 now = jiffies; 792 expires = 10*HZ; 793 794 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 795 if (time_after(c->mfc_un.unres.expires, now)) { 796 unsigned long interval = c->mfc_un.unres.expires - now; 797 if (interval < expires) 798 expires = interval; 799 continue; 800 } 801 802 list_del(&c->list); 803 mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE); 804 ipmr_destroy_unres(mrt, (struct mfc_cache *)c); 805 } 806 807 if (!list_empty(&mrt->mfc_unres_queue)) 808 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 809 810 out: 811 spin_unlock(&mfc_unres_lock); 812 } 813 814 /* Fill oifs list. It is called under write locked mrt_lock. */ 815 static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, 816 unsigned char *ttls) 817 { 818 int vifi; 819 820 cache->mfc_un.res.minvif = MAXVIFS; 821 cache->mfc_un.res.maxvif = 0; 822 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 823 824 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 825 if (VIF_EXISTS(mrt, vifi) && 826 ttls[vifi] && ttls[vifi] < 255) { 827 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 828 if (cache->mfc_un.res.minvif > vifi) 829 cache->mfc_un.res.minvif = vifi; 830 if (cache->mfc_un.res.maxvif <= vifi) 831 cache->mfc_un.res.maxvif = vifi + 1; 832 } 833 } 834 cache->mfc_un.res.lastuse = jiffies; 835 } 836 837 static int vif_add(struct net *net, struct mr_table *mrt, 838 struct vifctl *vifc, int mrtsock) 839 { 840 struct netdev_phys_item_id ppid = { }; 841 int vifi = vifc->vifc_vifi; 842 struct vif_device *v = &mrt->vif_table[vifi]; 843 struct net_device *dev; 844 struct in_device *in_dev; 845 int err; 846 847 /* Is vif busy ? */ 848 if (VIF_EXISTS(mrt, vifi)) 849 return -EADDRINUSE; 850 851 switch (vifc->vifc_flags) { 852 case VIFF_REGISTER: 853 if (!ipmr_pimsm_enabled()) 854 return -EINVAL; 855 /* Special Purpose VIF in PIM 856 * All the packets will be sent to the daemon 857 */ 858 if (mrt->mroute_reg_vif_num >= 0) 859 return -EADDRINUSE; 860 dev = ipmr_reg_vif(net, mrt); 861 if (!dev) 862 return -ENOBUFS; 863 err = dev_set_allmulti(dev, 1); 864 if (err) { 865 unregister_netdevice(dev); 866 dev_put(dev); 867 return err; 868 } 869 break; 870 case VIFF_TUNNEL: 871 dev = ipmr_new_tunnel(net, vifc); 872 if (!dev) 873 return -ENOBUFS; 874 err = dev_set_allmulti(dev, 1); 875 if (err) { 876 ipmr_del_tunnel(dev, vifc); 877 dev_put(dev); 878 return err; 879 } 880 break; 881 case VIFF_USE_IFINDEX: 882 case 0: 883 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 884 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 885 if (dev && !__in_dev_get_rtnl(dev)) { 886 dev_put(dev); 887 return -EADDRNOTAVAIL; 888 } 889 } else { 890 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 891 } 892 if (!dev) 893 return -EADDRNOTAVAIL; 894 err = dev_set_allmulti(dev, 1); 895 if (err) { 896 dev_put(dev); 897 return err; 898 } 899 break; 900 default: 901 return -EINVAL; 902 } 903 904 in_dev = __in_dev_get_rtnl(dev); 905 if (!in_dev) { 906 dev_put(dev); 907 return -EADDRNOTAVAIL; 908 } 909 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 910 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, 911 dev->ifindex, &in_dev->cnf); 912 ip_rt_multicast_event(in_dev); 913 914 /* Fill in the VIF structures */ 915 vif_device_init(v, dev, vifc->vifc_rate_limit, 916 vifc->vifc_threshold, 917 vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0), 918 (VIFF_TUNNEL | VIFF_REGISTER)); 919 920 err = dev_get_port_parent_id(dev, &ppid, true); 921 if (err == 0) { 922 memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len); 923 v->dev_parent_id.id_len = ppid.id_len; 924 } else { 925 v->dev_parent_id.id_len = 0; 926 } 927 928 v->local = vifc->vifc_lcl_addr.s_addr; 929 v->remote = vifc->vifc_rmt_addr.s_addr; 930 931 /* And finish update writing critical data */ 932 write_lock_bh(&mrt_lock); 933 v->dev = dev; 934 if (v->flags & VIFF_REGISTER) 935 mrt->mroute_reg_vif_num = vifi; 936 if (vifi+1 > mrt->maxvif) 937 mrt->maxvif = vifi+1; 938 write_unlock_bh(&mrt_lock); 939 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id); 940 return 0; 941 } 942 943 /* called with rcu_read_lock() */ 944 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 945 __be32 origin, 946 __be32 mcastgrp) 947 { 948 struct mfc_cache_cmp_arg arg = { 949 .mfc_mcastgrp = mcastgrp, 950 .mfc_origin = origin 951 }; 952 953 return mr_mfc_find(mrt, &arg); 954 } 955 956 /* Look for a (*,G) entry */ 957 static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, 958 __be32 mcastgrp, int vifi) 959 { 960 struct mfc_cache_cmp_arg arg = { 961 .mfc_mcastgrp = mcastgrp, 962 .mfc_origin = htonl(INADDR_ANY) 963 }; 964 965 if (mcastgrp == htonl(INADDR_ANY)) 966 return mr_mfc_find_any_parent(mrt, vifi); 967 return mr_mfc_find_any(mrt, vifi, &arg); 968 } 969 970 /* Look for a (S,G,iif) entry if parent != -1 */ 971 static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt, 972 __be32 origin, __be32 mcastgrp, 973 int parent) 974 { 975 struct mfc_cache_cmp_arg arg = { 976 .mfc_mcastgrp = mcastgrp, 977 .mfc_origin = origin, 978 }; 979 980 return mr_mfc_find_parent(mrt, &arg, parent); 981 } 982 983 /* Allocate a multicast cache entry */ 984 static struct mfc_cache *ipmr_cache_alloc(void) 985 { 986 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 987 988 if (c) { 989 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 990 c->_c.mfc_un.res.minvif = MAXVIFS; 991 c->_c.free = ipmr_cache_free_rcu; 992 refcount_set(&c->_c.mfc_un.res.refcount, 1); 993 } 994 return c; 995 } 996 997 static struct mfc_cache *ipmr_cache_alloc_unres(void) 998 { 999 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1000 1001 if (c) { 1002 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 1003 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 1004 } 1005 return c; 1006 } 1007 1008 /* A cache entry has gone into a resolved state from queued */ 1009 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, 1010 struct mfc_cache *uc, struct mfc_cache *c) 1011 { 1012 struct sk_buff *skb; 1013 struct nlmsgerr *e; 1014 1015 /* Play the pending entries through our router */ 1016 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 1017 if (ip_hdr(skb)->version == 0) { 1018 struct nlmsghdr *nlh = skb_pull(skb, 1019 sizeof(struct iphdr)); 1020 1021 if (mr_fill_mroute(mrt, skb, &c->_c, 1022 nlmsg_data(nlh)) > 0) { 1023 nlh->nlmsg_len = skb_tail_pointer(skb) - 1024 (u8 *)nlh; 1025 } else { 1026 nlh->nlmsg_type = NLMSG_ERROR; 1027 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1028 skb_trim(skb, nlh->nlmsg_len); 1029 e = nlmsg_data(nlh); 1030 e->error = -EMSGSIZE; 1031 memset(&e->msg, 0, sizeof(e->msg)); 1032 } 1033 1034 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1035 } else { 1036 ip_mr_forward(net, mrt, skb->dev, skb, c, 0); 1037 } 1038 } 1039 } 1040 1041 /* Bounce a cache query up to mrouted and netlink. 1042 * 1043 * Called under mrt_lock. 1044 */ 1045 static int ipmr_cache_report(struct mr_table *mrt, 1046 struct sk_buff *pkt, vifi_t vifi, int assert) 1047 { 1048 const int ihl = ip_hdrlen(pkt); 1049 struct sock *mroute_sk; 1050 struct igmphdr *igmp; 1051 struct igmpmsg *msg; 1052 struct sk_buff *skb; 1053 int ret; 1054 1055 if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) 1056 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 1057 else 1058 skb = alloc_skb(128, GFP_ATOMIC); 1059 1060 if (!skb) 1061 return -ENOBUFS; 1062 1063 if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) { 1064 /* Ugly, but we have no choice with this interface. 1065 * Duplicate old header, fix ihl, length etc. 1066 * And all this only to mangle msg->im_msgtype and 1067 * to set msg->im_mbz to "mbz" :-) 1068 */ 1069 skb_push(skb, sizeof(struct iphdr)); 1070 skb_reset_network_header(skb); 1071 skb_reset_transport_header(skb); 1072 msg = (struct igmpmsg *)skb_network_header(skb); 1073 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 1074 msg->im_msgtype = assert; 1075 msg->im_mbz = 0; 1076 if (assert == IGMPMSG_WRVIFWHOLE) 1077 msg->im_vif = vifi; 1078 else 1079 msg->im_vif = mrt->mroute_reg_vif_num; 1080 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 1081 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 1082 sizeof(struct iphdr)); 1083 } else { 1084 /* Copy the IP header */ 1085 skb_set_network_header(skb, skb->len); 1086 skb_put(skb, ihl); 1087 skb_copy_to_linear_data(skb, pkt->data, ihl); 1088 /* Flag to the kernel this is a route add */ 1089 ip_hdr(skb)->protocol = 0; 1090 msg = (struct igmpmsg *)skb_network_header(skb); 1091 msg->im_vif = vifi; 1092 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1093 /* Add our header */ 1094 igmp = skb_put(skb, sizeof(struct igmphdr)); 1095 igmp->type = assert; 1096 msg->im_msgtype = assert; 1097 igmp->code = 0; 1098 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 1099 skb->transport_header = skb->network_header; 1100 } 1101 1102 rcu_read_lock(); 1103 mroute_sk = rcu_dereference(mrt->mroute_sk); 1104 if (!mroute_sk) { 1105 rcu_read_unlock(); 1106 kfree_skb(skb); 1107 return -EINVAL; 1108 } 1109 1110 igmpmsg_netlink_event(mrt, skb); 1111 1112 /* Deliver to mrouted */ 1113 ret = sock_queue_rcv_skb(mroute_sk, skb); 1114 rcu_read_unlock(); 1115 if (ret < 0) { 1116 net_warn_ratelimited("mroute: pending queue full, dropping entries\n"); 1117 kfree_skb(skb); 1118 } 1119 1120 return ret; 1121 } 1122 1123 /* Queue a packet for resolution. It gets locked cache entry! */ 1124 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, 1125 struct sk_buff *skb, struct net_device *dev) 1126 { 1127 const struct iphdr *iph = ip_hdr(skb); 1128 struct mfc_cache *c; 1129 bool found = false; 1130 int err; 1131 1132 spin_lock_bh(&mfc_unres_lock); 1133 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1134 if (c->mfc_mcastgrp == iph->daddr && 1135 c->mfc_origin == iph->saddr) { 1136 found = true; 1137 break; 1138 } 1139 } 1140 1141 if (!found) { 1142 /* Create a new entry if allowable */ 1143 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 1144 (c = ipmr_cache_alloc_unres()) == NULL) { 1145 spin_unlock_bh(&mfc_unres_lock); 1146 1147 kfree_skb(skb); 1148 return -ENOBUFS; 1149 } 1150 1151 /* Fill in the new cache entry */ 1152 c->_c.mfc_parent = -1; 1153 c->mfc_origin = iph->saddr; 1154 c->mfc_mcastgrp = iph->daddr; 1155 1156 /* Reflect first query at mrouted. */ 1157 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 1158 1159 if (err < 0) { 1160 /* If the report failed throw the cache entry 1161 out - Brad Parker 1162 */ 1163 spin_unlock_bh(&mfc_unres_lock); 1164 1165 ipmr_cache_free(c); 1166 kfree_skb(skb); 1167 return err; 1168 } 1169 1170 atomic_inc(&mrt->cache_resolve_queue_len); 1171 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1172 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1173 1174 if (atomic_read(&mrt->cache_resolve_queue_len) == 1) 1175 mod_timer(&mrt->ipmr_expire_timer, 1176 c->_c.mfc_un.unres.expires); 1177 } 1178 1179 /* See if we can append the packet */ 1180 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1181 kfree_skb(skb); 1182 err = -ENOBUFS; 1183 } else { 1184 if (dev) { 1185 skb->dev = dev; 1186 skb->skb_iif = dev->ifindex; 1187 } 1188 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1189 err = 0; 1190 } 1191 1192 spin_unlock_bh(&mfc_unres_lock); 1193 return err; 1194 } 1195 1196 /* MFC cache manipulation by user space mroute daemon */ 1197 1198 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) 1199 { 1200 struct net *net = read_pnet(&mrt->net); 1201 struct mfc_cache *c; 1202 1203 /* The entries are added/deleted only under RTNL */ 1204 rcu_read_lock(); 1205 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1206 mfc->mfcc_mcastgrp.s_addr, parent); 1207 rcu_read_unlock(); 1208 if (!c) 1209 return -ENOENT; 1210 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params); 1211 list_del_rcu(&c->_c.list); 1212 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id); 1213 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1214 mr_cache_put(&c->_c); 1215 1216 return 0; 1217 } 1218 1219 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, 1220 struct mfcctl *mfc, int mrtsock, int parent) 1221 { 1222 struct mfc_cache *uc, *c; 1223 struct mr_mfc *_uc; 1224 bool found; 1225 int ret; 1226 1227 if (mfc->mfcc_parent >= MAXVIFS) 1228 return -ENFILE; 1229 1230 /* The entries are added/deleted only under RTNL */ 1231 rcu_read_lock(); 1232 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1233 mfc->mfcc_mcastgrp.s_addr, parent); 1234 rcu_read_unlock(); 1235 if (c) { 1236 write_lock_bh(&mrt_lock); 1237 c->_c.mfc_parent = mfc->mfcc_parent; 1238 ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); 1239 if (!mrtsock) 1240 c->_c.mfc_flags |= MFC_STATIC; 1241 write_unlock_bh(&mrt_lock); 1242 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, 1243 mrt->id); 1244 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1245 return 0; 1246 } 1247 1248 if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) && 1249 !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1250 return -EINVAL; 1251 1252 c = ipmr_cache_alloc(); 1253 if (!c) 1254 return -ENOMEM; 1255 1256 c->mfc_origin = mfc->mfcc_origin.s_addr; 1257 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1258 c->_c.mfc_parent = mfc->mfcc_parent; 1259 ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); 1260 if (!mrtsock) 1261 c->_c.mfc_flags |= MFC_STATIC; 1262 1263 ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1264 ipmr_rht_params); 1265 if (ret) { 1266 pr_err("ipmr: rhtable insert error %d\n", ret); 1267 ipmr_cache_free(c); 1268 return ret; 1269 } 1270 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1271 /* Check to see if we resolved a queued list. If so we 1272 * need to send on the frames and tidy up. 1273 */ 1274 found = false; 1275 spin_lock_bh(&mfc_unres_lock); 1276 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1277 uc = (struct mfc_cache *)_uc; 1278 if (uc->mfc_origin == c->mfc_origin && 1279 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1280 list_del(&_uc->list); 1281 atomic_dec(&mrt->cache_resolve_queue_len); 1282 found = true; 1283 break; 1284 } 1285 } 1286 if (list_empty(&mrt->mfc_unres_queue)) 1287 del_timer(&mrt->ipmr_expire_timer); 1288 spin_unlock_bh(&mfc_unres_lock); 1289 1290 if (found) { 1291 ipmr_cache_resolve(net, mrt, uc, c); 1292 ipmr_cache_free(uc); 1293 } 1294 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id); 1295 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1296 return 0; 1297 } 1298 1299 /* Close the multicast socket, and clear the vif tables etc */ 1300 static void mroute_clean_tables(struct mr_table *mrt, int flags) 1301 { 1302 struct net *net = read_pnet(&mrt->net); 1303 struct mr_mfc *c, *tmp; 1304 struct mfc_cache *cache; 1305 LIST_HEAD(list); 1306 int i; 1307 1308 /* Shut down all active vif entries */ 1309 if (flags & (MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC)) { 1310 for (i = 0; i < mrt->maxvif; i++) { 1311 if (((mrt->vif_table[i].flags & VIFF_STATIC) && 1312 !(flags & MRT_FLUSH_VIFS_STATIC)) || 1313 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS))) 1314 continue; 1315 vif_delete(mrt, i, 0, &list); 1316 } 1317 unregister_netdevice_many(&list); 1318 } 1319 1320 /* Wipe the cache */ 1321 if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) { 1322 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1323 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) || 1324 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC))) 1325 continue; 1326 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1327 list_del_rcu(&c->list); 1328 cache = (struct mfc_cache *)c; 1329 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache, 1330 mrt->id); 1331 mroute_netlink_event(mrt, cache, RTM_DELROUTE); 1332 mr_cache_put(c); 1333 } 1334 } 1335 1336 if (flags & MRT_FLUSH_MFC) { 1337 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1338 spin_lock_bh(&mfc_unres_lock); 1339 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1340 list_del(&c->list); 1341 cache = (struct mfc_cache *)c; 1342 mroute_netlink_event(mrt, cache, RTM_DELROUTE); 1343 ipmr_destroy_unres(mrt, cache); 1344 } 1345 spin_unlock_bh(&mfc_unres_lock); 1346 } 1347 } 1348 } 1349 1350 /* called from ip_ra_control(), before an RCU grace period, 1351 * we dont need to call synchronize_rcu() here 1352 */ 1353 static void mrtsock_destruct(struct sock *sk) 1354 { 1355 struct net *net = sock_net(sk); 1356 struct mr_table *mrt; 1357 1358 rtnl_lock(); 1359 ipmr_for_each_table(mrt, net) { 1360 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1361 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1362 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1363 NETCONFA_MC_FORWARDING, 1364 NETCONFA_IFINDEX_ALL, 1365 net->ipv4.devconf_all); 1366 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1367 mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC); 1368 } 1369 } 1370 rtnl_unlock(); 1371 } 1372 1373 /* Socket options and virtual interface manipulation. The whole 1374 * virtual interface system is a complete heap, but unfortunately 1375 * that's how BSD mrouted happens to think. Maybe one day with a proper 1376 * MOSPF/PIM router set up we can clean this up. 1377 */ 1378 1379 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, 1380 unsigned int optlen) 1381 { 1382 struct net *net = sock_net(sk); 1383 int val, ret = 0, parent = 0; 1384 struct mr_table *mrt; 1385 struct vifctl vif; 1386 struct mfcctl mfc; 1387 bool do_wrvifwhole; 1388 u32 uval; 1389 1390 /* There's one exception to the lock - MRT_DONE which needs to unlock */ 1391 rtnl_lock(); 1392 if (sk->sk_type != SOCK_RAW || 1393 inet_sk(sk)->inet_num != IPPROTO_IGMP) { 1394 ret = -EOPNOTSUPP; 1395 goto out_unlock; 1396 } 1397 1398 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1399 if (!mrt) { 1400 ret = -ENOENT; 1401 goto out_unlock; 1402 } 1403 if (optname != MRT_INIT) { 1404 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1405 !ns_capable(net->user_ns, CAP_NET_ADMIN)) { 1406 ret = -EACCES; 1407 goto out_unlock; 1408 } 1409 } 1410 1411 switch (optname) { 1412 case MRT_INIT: 1413 if (optlen != sizeof(int)) { 1414 ret = -EINVAL; 1415 break; 1416 } 1417 if (rtnl_dereference(mrt->mroute_sk)) { 1418 ret = -EADDRINUSE; 1419 break; 1420 } 1421 1422 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1423 if (ret == 0) { 1424 rcu_assign_pointer(mrt->mroute_sk, sk); 1425 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1426 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1427 NETCONFA_MC_FORWARDING, 1428 NETCONFA_IFINDEX_ALL, 1429 net->ipv4.devconf_all); 1430 } 1431 break; 1432 case MRT_DONE: 1433 if (sk != rcu_access_pointer(mrt->mroute_sk)) { 1434 ret = -EACCES; 1435 } else { 1436 /* We need to unlock here because mrtsock_destruct takes 1437 * care of rtnl itself and we can't change that due to 1438 * the IP_ROUTER_ALERT setsockopt which runs without it. 1439 */ 1440 rtnl_unlock(); 1441 ret = ip_ra_control(sk, 0, NULL); 1442 goto out; 1443 } 1444 break; 1445 case MRT_ADD_VIF: 1446 case MRT_DEL_VIF: 1447 if (optlen != sizeof(vif)) { 1448 ret = -EINVAL; 1449 break; 1450 } 1451 if (copy_from_user(&vif, optval, sizeof(vif))) { 1452 ret = -EFAULT; 1453 break; 1454 } 1455 if (vif.vifc_vifi >= MAXVIFS) { 1456 ret = -ENFILE; 1457 break; 1458 } 1459 if (optname == MRT_ADD_VIF) { 1460 ret = vif_add(net, mrt, &vif, 1461 sk == rtnl_dereference(mrt->mroute_sk)); 1462 } else { 1463 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1464 } 1465 break; 1466 /* Manipulate the forwarding caches. These live 1467 * in a sort of kernel/user symbiosis. 1468 */ 1469 case MRT_ADD_MFC: 1470 case MRT_DEL_MFC: 1471 parent = -1; 1472 /* fall through */ 1473 case MRT_ADD_MFC_PROXY: 1474 case MRT_DEL_MFC_PROXY: 1475 if (optlen != sizeof(mfc)) { 1476 ret = -EINVAL; 1477 break; 1478 } 1479 if (copy_from_user(&mfc, optval, sizeof(mfc))) { 1480 ret = -EFAULT; 1481 break; 1482 } 1483 if (parent == 0) 1484 parent = mfc.mfcc_parent; 1485 if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) 1486 ret = ipmr_mfc_delete(mrt, &mfc, parent); 1487 else 1488 ret = ipmr_mfc_add(net, mrt, &mfc, 1489 sk == rtnl_dereference(mrt->mroute_sk), 1490 parent); 1491 break; 1492 case MRT_FLUSH: 1493 if (optlen != sizeof(val)) { 1494 ret = -EINVAL; 1495 break; 1496 } 1497 if (get_user(val, (int __user *)optval)) { 1498 ret = -EFAULT; 1499 break; 1500 } 1501 mroute_clean_tables(mrt, val); 1502 break; 1503 /* Control PIM assert. */ 1504 case MRT_ASSERT: 1505 if (optlen != sizeof(val)) { 1506 ret = -EINVAL; 1507 break; 1508 } 1509 if (get_user(val, (int __user *)optval)) { 1510 ret = -EFAULT; 1511 break; 1512 } 1513 mrt->mroute_do_assert = val; 1514 break; 1515 case MRT_PIM: 1516 if (!ipmr_pimsm_enabled()) { 1517 ret = -ENOPROTOOPT; 1518 break; 1519 } 1520 if (optlen != sizeof(val)) { 1521 ret = -EINVAL; 1522 break; 1523 } 1524 if (get_user(val, (int __user *)optval)) { 1525 ret = -EFAULT; 1526 break; 1527 } 1528 1529 do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE); 1530 val = !!val; 1531 if (val != mrt->mroute_do_pim) { 1532 mrt->mroute_do_pim = val; 1533 mrt->mroute_do_assert = val; 1534 mrt->mroute_do_wrvifwhole = do_wrvifwhole; 1535 } 1536 break; 1537 case MRT_TABLE: 1538 if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { 1539 ret = -ENOPROTOOPT; 1540 break; 1541 } 1542 if (optlen != sizeof(uval)) { 1543 ret = -EINVAL; 1544 break; 1545 } 1546 if (get_user(uval, (u32 __user *)optval)) { 1547 ret = -EFAULT; 1548 break; 1549 } 1550 1551 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1552 ret = -EBUSY; 1553 } else { 1554 mrt = ipmr_new_table(net, uval); 1555 if (IS_ERR(mrt)) 1556 ret = PTR_ERR(mrt); 1557 else 1558 raw_sk(sk)->ipmr_table = uval; 1559 } 1560 break; 1561 /* Spurious command, or MRT_VERSION which you cannot set. */ 1562 default: 1563 ret = -ENOPROTOOPT; 1564 } 1565 out_unlock: 1566 rtnl_unlock(); 1567 out: 1568 return ret; 1569 } 1570 1571 /* Getsock opt support for the multicast routing system. */ 1572 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) 1573 { 1574 int olr; 1575 int val; 1576 struct net *net = sock_net(sk); 1577 struct mr_table *mrt; 1578 1579 if (sk->sk_type != SOCK_RAW || 1580 inet_sk(sk)->inet_num != IPPROTO_IGMP) 1581 return -EOPNOTSUPP; 1582 1583 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1584 if (!mrt) 1585 return -ENOENT; 1586 1587 switch (optname) { 1588 case MRT_VERSION: 1589 val = 0x0305; 1590 break; 1591 case MRT_PIM: 1592 if (!ipmr_pimsm_enabled()) 1593 return -ENOPROTOOPT; 1594 val = mrt->mroute_do_pim; 1595 break; 1596 case MRT_ASSERT: 1597 val = mrt->mroute_do_assert; 1598 break; 1599 default: 1600 return -ENOPROTOOPT; 1601 } 1602 1603 if (get_user(olr, optlen)) 1604 return -EFAULT; 1605 olr = min_t(unsigned int, olr, sizeof(int)); 1606 if (olr < 0) 1607 return -EINVAL; 1608 if (put_user(olr, optlen)) 1609 return -EFAULT; 1610 if (copy_to_user(optval, &val, olr)) 1611 return -EFAULT; 1612 return 0; 1613 } 1614 1615 /* The IP multicast ioctl support routines. */ 1616 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) 1617 { 1618 struct sioc_sg_req sr; 1619 struct sioc_vif_req vr; 1620 struct vif_device *vif; 1621 struct mfc_cache *c; 1622 struct net *net = sock_net(sk); 1623 struct mr_table *mrt; 1624 1625 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1626 if (!mrt) 1627 return -ENOENT; 1628 1629 switch (cmd) { 1630 case SIOCGETVIFCNT: 1631 if (copy_from_user(&vr, arg, sizeof(vr))) 1632 return -EFAULT; 1633 if (vr.vifi >= mrt->maxvif) 1634 return -EINVAL; 1635 vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); 1636 read_lock(&mrt_lock); 1637 vif = &mrt->vif_table[vr.vifi]; 1638 if (VIF_EXISTS(mrt, vr.vifi)) { 1639 vr.icount = vif->pkt_in; 1640 vr.ocount = vif->pkt_out; 1641 vr.ibytes = vif->bytes_in; 1642 vr.obytes = vif->bytes_out; 1643 read_unlock(&mrt_lock); 1644 1645 if (copy_to_user(arg, &vr, sizeof(vr))) 1646 return -EFAULT; 1647 return 0; 1648 } 1649 read_unlock(&mrt_lock); 1650 return -EADDRNOTAVAIL; 1651 case SIOCGETSGCNT: 1652 if (copy_from_user(&sr, arg, sizeof(sr))) 1653 return -EFAULT; 1654 1655 rcu_read_lock(); 1656 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1657 if (c) { 1658 sr.pktcnt = c->_c.mfc_un.res.pkt; 1659 sr.bytecnt = c->_c.mfc_un.res.bytes; 1660 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1661 rcu_read_unlock(); 1662 1663 if (copy_to_user(arg, &sr, sizeof(sr))) 1664 return -EFAULT; 1665 return 0; 1666 } 1667 rcu_read_unlock(); 1668 return -EADDRNOTAVAIL; 1669 default: 1670 return -ENOIOCTLCMD; 1671 } 1672 } 1673 1674 #ifdef CONFIG_COMPAT 1675 struct compat_sioc_sg_req { 1676 struct in_addr src; 1677 struct in_addr grp; 1678 compat_ulong_t pktcnt; 1679 compat_ulong_t bytecnt; 1680 compat_ulong_t wrong_if; 1681 }; 1682 1683 struct compat_sioc_vif_req { 1684 vifi_t vifi; /* Which iface */ 1685 compat_ulong_t icount; 1686 compat_ulong_t ocount; 1687 compat_ulong_t ibytes; 1688 compat_ulong_t obytes; 1689 }; 1690 1691 int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1692 { 1693 struct compat_sioc_sg_req sr; 1694 struct compat_sioc_vif_req vr; 1695 struct vif_device *vif; 1696 struct mfc_cache *c; 1697 struct net *net = sock_net(sk); 1698 struct mr_table *mrt; 1699 1700 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1701 if (!mrt) 1702 return -ENOENT; 1703 1704 switch (cmd) { 1705 case SIOCGETVIFCNT: 1706 if (copy_from_user(&vr, arg, sizeof(vr))) 1707 return -EFAULT; 1708 if (vr.vifi >= mrt->maxvif) 1709 return -EINVAL; 1710 vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); 1711 read_lock(&mrt_lock); 1712 vif = &mrt->vif_table[vr.vifi]; 1713 if (VIF_EXISTS(mrt, vr.vifi)) { 1714 vr.icount = vif->pkt_in; 1715 vr.ocount = vif->pkt_out; 1716 vr.ibytes = vif->bytes_in; 1717 vr.obytes = vif->bytes_out; 1718 read_unlock(&mrt_lock); 1719 1720 if (copy_to_user(arg, &vr, sizeof(vr))) 1721 return -EFAULT; 1722 return 0; 1723 } 1724 read_unlock(&mrt_lock); 1725 return -EADDRNOTAVAIL; 1726 case SIOCGETSGCNT: 1727 if (copy_from_user(&sr, arg, sizeof(sr))) 1728 return -EFAULT; 1729 1730 rcu_read_lock(); 1731 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1732 if (c) { 1733 sr.pktcnt = c->_c.mfc_un.res.pkt; 1734 sr.bytecnt = c->_c.mfc_un.res.bytes; 1735 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1736 rcu_read_unlock(); 1737 1738 if (copy_to_user(arg, &sr, sizeof(sr))) 1739 return -EFAULT; 1740 return 0; 1741 } 1742 rcu_read_unlock(); 1743 return -EADDRNOTAVAIL; 1744 default: 1745 return -ENOIOCTLCMD; 1746 } 1747 } 1748 #endif 1749 1750 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1751 { 1752 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1753 struct net *net = dev_net(dev); 1754 struct mr_table *mrt; 1755 struct vif_device *v; 1756 int ct; 1757 1758 if (event != NETDEV_UNREGISTER) 1759 return NOTIFY_DONE; 1760 1761 ipmr_for_each_table(mrt, net) { 1762 v = &mrt->vif_table[0]; 1763 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1764 if (v->dev == dev) 1765 vif_delete(mrt, ct, 1, NULL); 1766 } 1767 } 1768 return NOTIFY_DONE; 1769 } 1770 1771 static struct notifier_block ip_mr_notifier = { 1772 .notifier_call = ipmr_device_event, 1773 }; 1774 1775 /* Encapsulate a packet by attaching a valid IPIP header to it. 1776 * This avoids tunnel drivers and other mess and gives us the speed so 1777 * important for multicast video. 1778 */ 1779 static void ip_encap(struct net *net, struct sk_buff *skb, 1780 __be32 saddr, __be32 daddr) 1781 { 1782 struct iphdr *iph; 1783 const struct iphdr *old_iph = ip_hdr(skb); 1784 1785 skb_push(skb, sizeof(struct iphdr)); 1786 skb->transport_header = skb->network_header; 1787 skb_reset_network_header(skb); 1788 iph = ip_hdr(skb); 1789 1790 iph->version = 4; 1791 iph->tos = old_iph->tos; 1792 iph->ttl = old_iph->ttl; 1793 iph->frag_off = 0; 1794 iph->daddr = daddr; 1795 iph->saddr = saddr; 1796 iph->protocol = IPPROTO_IPIP; 1797 iph->ihl = 5; 1798 iph->tot_len = htons(skb->len); 1799 ip_select_ident(net, skb, NULL); 1800 ip_send_check(iph); 1801 1802 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1803 nf_reset(skb); 1804 } 1805 1806 static inline int ipmr_forward_finish(struct net *net, struct sock *sk, 1807 struct sk_buff *skb) 1808 { 1809 struct ip_options *opt = &(IPCB(skb)->opt); 1810 1811 IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); 1812 IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len); 1813 1814 if (unlikely(opt->optlen)) 1815 ip_forward_options(skb); 1816 1817 return dst_output(net, sk, skb); 1818 } 1819 1820 #ifdef CONFIG_NET_SWITCHDEV 1821 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1822 int in_vifi, int out_vifi) 1823 { 1824 struct vif_device *out_vif = &mrt->vif_table[out_vifi]; 1825 struct vif_device *in_vif = &mrt->vif_table[in_vifi]; 1826 1827 if (!skb->offload_l3_fwd_mark) 1828 return false; 1829 if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len) 1830 return false; 1831 return netdev_phys_item_id_same(&out_vif->dev_parent_id, 1832 &in_vif->dev_parent_id); 1833 } 1834 #else 1835 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1836 int in_vifi, int out_vifi) 1837 { 1838 return false; 1839 } 1840 #endif 1841 1842 /* Processing handlers for ipmr_forward */ 1843 1844 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, 1845 int in_vifi, struct sk_buff *skb, int vifi) 1846 { 1847 const struct iphdr *iph = ip_hdr(skb); 1848 struct vif_device *vif = &mrt->vif_table[vifi]; 1849 struct net_device *dev; 1850 struct rtable *rt; 1851 struct flowi4 fl4; 1852 int encap = 0; 1853 1854 if (!vif->dev) 1855 goto out_free; 1856 1857 if (vif->flags & VIFF_REGISTER) { 1858 vif->pkt_out++; 1859 vif->bytes_out += skb->len; 1860 vif->dev->stats.tx_bytes += skb->len; 1861 vif->dev->stats.tx_packets++; 1862 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); 1863 goto out_free; 1864 } 1865 1866 if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) 1867 goto out_free; 1868 1869 if (vif->flags & VIFF_TUNNEL) { 1870 rt = ip_route_output_ports(net, &fl4, NULL, 1871 vif->remote, vif->local, 1872 0, 0, 1873 IPPROTO_IPIP, 1874 RT_TOS(iph->tos), vif->link); 1875 if (IS_ERR(rt)) 1876 goto out_free; 1877 encap = sizeof(struct iphdr); 1878 } else { 1879 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0, 1880 0, 0, 1881 IPPROTO_IPIP, 1882 RT_TOS(iph->tos), vif->link); 1883 if (IS_ERR(rt)) 1884 goto out_free; 1885 } 1886 1887 dev = rt->dst.dev; 1888 1889 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { 1890 /* Do not fragment multicasts. Alas, IPv4 does not 1891 * allow to send ICMP, so that packets will disappear 1892 * to blackhole. 1893 */ 1894 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 1895 ip_rt_put(rt); 1896 goto out_free; 1897 } 1898 1899 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len; 1900 1901 if (skb_cow(skb, encap)) { 1902 ip_rt_put(rt); 1903 goto out_free; 1904 } 1905 1906 vif->pkt_out++; 1907 vif->bytes_out += skb->len; 1908 1909 skb_dst_drop(skb); 1910 skb_dst_set(skb, &rt->dst); 1911 ip_decrease_ttl(ip_hdr(skb)); 1912 1913 /* FIXME: forward and output firewalls used to be called here. 1914 * What do we do with netfilter? -- RR 1915 */ 1916 if (vif->flags & VIFF_TUNNEL) { 1917 ip_encap(net, skb, vif->local, vif->remote); 1918 /* FIXME: extra output firewall step used to be here. --RR */ 1919 vif->dev->stats.tx_packets++; 1920 vif->dev->stats.tx_bytes += skb->len; 1921 } 1922 1923 IPCB(skb)->flags |= IPSKB_FORWARDED; 1924 1925 /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1926 * not only before forwarding, but after forwarding on all output 1927 * interfaces. It is clear, if mrouter runs a multicasting 1928 * program, it should receive packets not depending to what interface 1929 * program is joined. 1930 * If we will not make it, the program will have to join on all 1931 * interfaces. On the other hand, multihoming host (or router, but 1932 * not mrouter) cannot join to more than one interface - it will 1933 * result in receiving multiple packets. 1934 */ 1935 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, 1936 net, NULL, skb, skb->dev, dev, 1937 ipmr_forward_finish); 1938 return; 1939 1940 out_free: 1941 kfree_skb(skb); 1942 } 1943 1944 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) 1945 { 1946 int ct; 1947 1948 for (ct = mrt->maxvif-1; ct >= 0; ct--) { 1949 if (mrt->vif_table[ct].dev == dev) 1950 break; 1951 } 1952 return ct; 1953 } 1954 1955 /* "local" means that we should preserve one skb (for local delivery) */ 1956 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 1957 struct net_device *dev, struct sk_buff *skb, 1958 struct mfc_cache *c, int local) 1959 { 1960 int true_vifi = ipmr_find_vif(mrt, dev); 1961 int psend = -1; 1962 int vif, ct; 1963 1964 vif = c->_c.mfc_parent; 1965 c->_c.mfc_un.res.pkt++; 1966 c->_c.mfc_un.res.bytes += skb->len; 1967 c->_c.mfc_un.res.lastuse = jiffies; 1968 1969 if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { 1970 struct mfc_cache *cache_proxy; 1971 1972 /* For an (*,G) entry, we only check that the incomming 1973 * interface is part of the static tree. 1974 */ 1975 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 1976 if (cache_proxy && 1977 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) 1978 goto forward; 1979 } 1980 1981 /* Wrong interface: drop packet and (maybe) send PIM assert. */ 1982 if (mrt->vif_table[vif].dev != dev) { 1983 if (rt_is_output_route(skb_rtable(skb))) { 1984 /* It is our own packet, looped back. 1985 * Very complicated situation... 1986 * 1987 * The best workaround until routing daemons will be 1988 * fixed is not to redistribute packet, if it was 1989 * send through wrong interface. It means, that 1990 * multicast applications WILL NOT work for 1991 * (S,G), which have default multicast route pointing 1992 * to wrong oif. In any case, it is not a good 1993 * idea to use multicasting applications on router. 1994 */ 1995 goto dont_forward; 1996 } 1997 1998 c->_c.mfc_un.res.wrong_if++; 1999 2000 if (true_vifi >= 0 && mrt->mroute_do_assert && 2001 /* pimsm uses asserts, when switching from RPT to SPT, 2002 * so that we cannot check that packet arrived on an oif. 2003 * It is bad, but otherwise we would need to move pretty 2004 * large chunk of pimd to kernel. Ough... --ANK 2005 */ 2006 (mrt->mroute_do_pim || 2007 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 2008 time_after(jiffies, 2009 c->_c.mfc_un.res.last_assert + 2010 MFC_ASSERT_THRESH)) { 2011 c->_c.mfc_un.res.last_assert = jiffies; 2012 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); 2013 if (mrt->mroute_do_wrvifwhole) 2014 ipmr_cache_report(mrt, skb, true_vifi, 2015 IGMPMSG_WRVIFWHOLE); 2016 } 2017 goto dont_forward; 2018 } 2019 2020 forward: 2021 mrt->vif_table[vif].pkt_in++; 2022 mrt->vif_table[vif].bytes_in += skb->len; 2023 2024 /* Forward the frame */ 2025 if (c->mfc_origin == htonl(INADDR_ANY) && 2026 c->mfc_mcastgrp == htonl(INADDR_ANY)) { 2027 if (true_vifi >= 0 && 2028 true_vifi != c->_c.mfc_parent && 2029 ip_hdr(skb)->ttl > 2030 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2031 /* It's an (*,*) entry and the packet is not coming from 2032 * the upstream: forward the packet to the upstream 2033 * only. 2034 */ 2035 psend = c->_c.mfc_parent; 2036 goto last_forward; 2037 } 2038 goto dont_forward; 2039 } 2040 for (ct = c->_c.mfc_un.res.maxvif - 1; 2041 ct >= c->_c.mfc_un.res.minvif; ct--) { 2042 /* For (*,G) entry, don't forward to the incoming interface */ 2043 if ((c->mfc_origin != htonl(INADDR_ANY) || 2044 ct != true_vifi) && 2045 ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) { 2046 if (psend != -1) { 2047 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2048 2049 if (skb2) 2050 ipmr_queue_xmit(net, mrt, true_vifi, 2051 skb2, psend); 2052 } 2053 psend = ct; 2054 } 2055 } 2056 last_forward: 2057 if (psend != -1) { 2058 if (local) { 2059 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2060 2061 if (skb2) 2062 ipmr_queue_xmit(net, mrt, true_vifi, skb2, 2063 psend); 2064 } else { 2065 ipmr_queue_xmit(net, mrt, true_vifi, skb, psend); 2066 return; 2067 } 2068 } 2069 2070 dont_forward: 2071 if (!local) 2072 kfree_skb(skb); 2073 } 2074 2075 static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) 2076 { 2077 struct rtable *rt = skb_rtable(skb); 2078 struct iphdr *iph = ip_hdr(skb); 2079 struct flowi4 fl4 = { 2080 .daddr = iph->daddr, 2081 .saddr = iph->saddr, 2082 .flowi4_tos = RT_TOS(iph->tos), 2083 .flowi4_oif = (rt_is_output_route(rt) ? 2084 skb->dev->ifindex : 0), 2085 .flowi4_iif = (rt_is_output_route(rt) ? 2086 LOOPBACK_IFINDEX : 2087 skb->dev->ifindex), 2088 .flowi4_mark = skb->mark, 2089 }; 2090 struct mr_table *mrt; 2091 int err; 2092 2093 err = ipmr_fib_lookup(net, &fl4, &mrt); 2094 if (err) 2095 return ERR_PTR(err); 2096 return mrt; 2097 } 2098 2099 /* Multicast packets for forwarding arrive here 2100 * Called with rcu_read_lock(); 2101 */ 2102 int ip_mr_input(struct sk_buff *skb) 2103 { 2104 struct mfc_cache *cache; 2105 struct net *net = dev_net(skb->dev); 2106 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 2107 struct mr_table *mrt; 2108 struct net_device *dev; 2109 2110 /* skb->dev passed in is the loX master dev for vrfs. 2111 * As there are no vifs associated with loopback devices, 2112 * get the proper interface that does have a vif associated with it. 2113 */ 2114 dev = skb->dev; 2115 if (netif_is_l3_master(skb->dev)) { 2116 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2117 if (!dev) { 2118 kfree_skb(skb); 2119 return -ENODEV; 2120 } 2121 } 2122 2123 /* Packet is looped back after forward, it should not be 2124 * forwarded second time, but still can be delivered locally. 2125 */ 2126 if (IPCB(skb)->flags & IPSKB_FORWARDED) 2127 goto dont_forward; 2128 2129 mrt = ipmr_rt_fib_lookup(net, skb); 2130 if (IS_ERR(mrt)) { 2131 kfree_skb(skb); 2132 return PTR_ERR(mrt); 2133 } 2134 if (!local) { 2135 if (IPCB(skb)->opt.router_alert) { 2136 if (ip_call_ra_chain(skb)) 2137 return 0; 2138 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) { 2139 /* IGMPv1 (and broken IGMPv2 implementations sort of 2140 * Cisco IOS <= 11.2(8)) do not put router alert 2141 * option to IGMP packets destined to routable 2142 * groups. It is very bad, because it means 2143 * that we can forward NO IGMP messages. 2144 */ 2145 struct sock *mroute_sk; 2146 2147 mroute_sk = rcu_dereference(mrt->mroute_sk); 2148 if (mroute_sk) { 2149 nf_reset(skb); 2150 raw_rcv(mroute_sk, skb); 2151 return 0; 2152 } 2153 } 2154 } 2155 2156 /* already under rcu_read_lock() */ 2157 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 2158 if (!cache) { 2159 int vif = ipmr_find_vif(mrt, dev); 2160 2161 if (vif >= 0) 2162 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, 2163 vif); 2164 } 2165 2166 /* No usable cache entry */ 2167 if (!cache) { 2168 int vif; 2169 2170 if (local) { 2171 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2172 ip_local_deliver(skb); 2173 if (!skb2) 2174 return -ENOBUFS; 2175 skb = skb2; 2176 } 2177 2178 read_lock(&mrt_lock); 2179 vif = ipmr_find_vif(mrt, dev); 2180 if (vif >= 0) { 2181 int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev); 2182 read_unlock(&mrt_lock); 2183 2184 return err2; 2185 } 2186 read_unlock(&mrt_lock); 2187 kfree_skb(skb); 2188 return -ENODEV; 2189 } 2190 2191 read_lock(&mrt_lock); 2192 ip_mr_forward(net, mrt, dev, skb, cache, local); 2193 read_unlock(&mrt_lock); 2194 2195 if (local) 2196 return ip_local_deliver(skb); 2197 2198 return 0; 2199 2200 dont_forward: 2201 if (local) 2202 return ip_local_deliver(skb); 2203 kfree_skb(skb); 2204 return 0; 2205 } 2206 2207 #ifdef CONFIG_IP_PIMSM_V1 2208 /* Handle IGMP messages of PIMv1 */ 2209 int pim_rcv_v1(struct sk_buff *skb) 2210 { 2211 struct igmphdr *pim; 2212 struct net *net = dev_net(skb->dev); 2213 struct mr_table *mrt; 2214 2215 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2216 goto drop; 2217 2218 pim = igmp_hdr(skb); 2219 2220 mrt = ipmr_rt_fib_lookup(net, skb); 2221 if (IS_ERR(mrt)) 2222 goto drop; 2223 if (!mrt->mroute_do_pim || 2224 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 2225 goto drop; 2226 2227 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2228 drop: 2229 kfree_skb(skb); 2230 } 2231 return 0; 2232 } 2233 #endif 2234 2235 #ifdef CONFIG_IP_PIMSM_V2 2236 static int pim_rcv(struct sk_buff *skb) 2237 { 2238 struct pimreghdr *pim; 2239 struct net *net = dev_net(skb->dev); 2240 struct mr_table *mrt; 2241 2242 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2243 goto drop; 2244 2245 pim = (struct pimreghdr *)skb_transport_header(skb); 2246 if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) || 2247 (pim->flags & PIM_NULL_REGISTER) || 2248 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 2249 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 2250 goto drop; 2251 2252 mrt = ipmr_rt_fib_lookup(net, skb); 2253 if (IS_ERR(mrt)) 2254 goto drop; 2255 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2256 drop: 2257 kfree_skb(skb); 2258 } 2259 return 0; 2260 } 2261 #endif 2262 2263 int ipmr_get_route(struct net *net, struct sk_buff *skb, 2264 __be32 saddr, __be32 daddr, 2265 struct rtmsg *rtm, u32 portid) 2266 { 2267 struct mfc_cache *cache; 2268 struct mr_table *mrt; 2269 int err; 2270 2271 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2272 if (!mrt) 2273 return -ENOENT; 2274 2275 rcu_read_lock(); 2276 cache = ipmr_cache_find(mrt, saddr, daddr); 2277 if (!cache && skb->dev) { 2278 int vif = ipmr_find_vif(mrt, skb->dev); 2279 2280 if (vif >= 0) 2281 cache = ipmr_cache_find_any(mrt, daddr, vif); 2282 } 2283 if (!cache) { 2284 struct sk_buff *skb2; 2285 struct iphdr *iph; 2286 struct net_device *dev; 2287 int vif = -1; 2288 2289 dev = skb->dev; 2290 read_lock(&mrt_lock); 2291 if (dev) 2292 vif = ipmr_find_vif(mrt, dev); 2293 if (vif < 0) { 2294 read_unlock(&mrt_lock); 2295 rcu_read_unlock(); 2296 return -ENODEV; 2297 } 2298 skb2 = skb_clone(skb, GFP_ATOMIC); 2299 if (!skb2) { 2300 read_unlock(&mrt_lock); 2301 rcu_read_unlock(); 2302 return -ENOMEM; 2303 } 2304 2305 NETLINK_CB(skb2).portid = portid; 2306 skb_push(skb2, sizeof(struct iphdr)); 2307 skb_reset_network_header(skb2); 2308 iph = ip_hdr(skb2); 2309 iph->ihl = sizeof(struct iphdr) >> 2; 2310 iph->saddr = saddr; 2311 iph->daddr = daddr; 2312 iph->version = 0; 2313 err = ipmr_cache_unresolved(mrt, vif, skb2, dev); 2314 read_unlock(&mrt_lock); 2315 rcu_read_unlock(); 2316 return err; 2317 } 2318 2319 read_lock(&mrt_lock); 2320 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2321 read_unlock(&mrt_lock); 2322 rcu_read_unlock(); 2323 return err; 2324 } 2325 2326 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2327 u32 portid, u32 seq, struct mfc_cache *c, int cmd, 2328 int flags) 2329 { 2330 struct nlmsghdr *nlh; 2331 struct rtmsg *rtm; 2332 int err; 2333 2334 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2335 if (!nlh) 2336 return -EMSGSIZE; 2337 2338 rtm = nlmsg_data(nlh); 2339 rtm->rtm_family = RTNL_FAMILY_IPMR; 2340 rtm->rtm_dst_len = 32; 2341 rtm->rtm_src_len = 32; 2342 rtm->rtm_tos = 0; 2343 rtm->rtm_table = mrt->id; 2344 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2345 goto nla_put_failure; 2346 rtm->rtm_type = RTN_MULTICAST; 2347 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2348 if (c->_c.mfc_flags & MFC_STATIC) 2349 rtm->rtm_protocol = RTPROT_STATIC; 2350 else 2351 rtm->rtm_protocol = RTPROT_MROUTED; 2352 rtm->rtm_flags = 0; 2353 2354 if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || 2355 nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) 2356 goto nla_put_failure; 2357 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2358 /* do not break the dump if cache is unresolved */ 2359 if (err < 0 && err != -ENOENT) 2360 goto nla_put_failure; 2361 2362 nlmsg_end(skb, nlh); 2363 return 0; 2364 2365 nla_put_failure: 2366 nlmsg_cancel(skb, nlh); 2367 return -EMSGSIZE; 2368 } 2369 2370 static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2371 u32 portid, u32 seq, struct mr_mfc *c, int cmd, 2372 int flags) 2373 { 2374 return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c, 2375 cmd, flags); 2376 } 2377 2378 static size_t mroute_msgsize(bool unresolved, int maxvif) 2379 { 2380 size_t len = 2381 NLMSG_ALIGN(sizeof(struct rtmsg)) 2382 + nla_total_size(4) /* RTA_TABLE */ 2383 + nla_total_size(4) /* RTA_SRC */ 2384 + nla_total_size(4) /* RTA_DST */ 2385 ; 2386 2387 if (!unresolved) 2388 len = len 2389 + nla_total_size(4) /* RTA_IIF */ 2390 + nla_total_size(0) /* RTA_MULTIPATH */ 2391 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2392 /* RTA_MFC_STATS */ 2393 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2394 ; 2395 2396 return len; 2397 } 2398 2399 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 2400 int cmd) 2401 { 2402 struct net *net = read_pnet(&mrt->net); 2403 struct sk_buff *skb; 2404 int err = -ENOBUFS; 2405 2406 skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS, 2407 mrt->maxvif), 2408 GFP_ATOMIC); 2409 if (!skb) 2410 goto errout; 2411 2412 err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2413 if (err < 0) 2414 goto errout; 2415 2416 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC); 2417 return; 2418 2419 errout: 2420 kfree_skb(skb); 2421 if (err < 0) 2422 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err); 2423 } 2424 2425 static size_t igmpmsg_netlink_msgsize(size_t payloadlen) 2426 { 2427 size_t len = 2428 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2429 + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */ 2430 + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */ 2431 + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */ 2432 + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */ 2433 /* IPMRA_CREPORT_PKT */ 2434 + nla_total_size(payloadlen) 2435 ; 2436 2437 return len; 2438 } 2439 2440 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) 2441 { 2442 struct net *net = read_pnet(&mrt->net); 2443 struct nlmsghdr *nlh; 2444 struct rtgenmsg *rtgenm; 2445 struct igmpmsg *msg; 2446 struct sk_buff *skb; 2447 struct nlattr *nla; 2448 int payloadlen; 2449 2450 payloadlen = pkt->len - sizeof(struct igmpmsg); 2451 msg = (struct igmpmsg *)skb_network_header(pkt); 2452 2453 skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2454 if (!skb) 2455 goto errout; 2456 2457 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2458 sizeof(struct rtgenmsg), 0); 2459 if (!nlh) 2460 goto errout; 2461 rtgenm = nlmsg_data(nlh); 2462 rtgenm->rtgen_family = RTNL_FAMILY_IPMR; 2463 if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) || 2464 nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif) || 2465 nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR, 2466 msg->im_src.s_addr) || 2467 nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR, 2468 msg->im_dst.s_addr)) 2469 goto nla_put_failure; 2470 2471 nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen); 2472 if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg), 2473 nla_data(nla), payloadlen)) 2474 goto nla_put_failure; 2475 2476 nlmsg_end(skb, nlh); 2477 2478 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC); 2479 return; 2480 2481 nla_put_failure: 2482 nlmsg_cancel(skb, nlh); 2483 errout: 2484 kfree_skb(skb); 2485 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); 2486 } 2487 2488 static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb, 2489 const struct nlmsghdr *nlh, 2490 struct nlattr **tb, 2491 struct netlink_ext_ack *extack) 2492 { 2493 struct rtmsg *rtm; 2494 int i, err; 2495 2496 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { 2497 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request"); 2498 return -EINVAL; 2499 } 2500 2501 if (!netlink_strict_get_check(skb)) 2502 return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, 2503 rtm_ipv4_policy, extack); 2504 2505 rtm = nlmsg_data(nlh); 2506 if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || 2507 (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || 2508 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || 2509 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) { 2510 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for multicast route get request"); 2511 return -EINVAL; 2512 } 2513 2514 err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, 2515 rtm_ipv4_policy, extack); 2516 if (err) 2517 return err; 2518 2519 if ((tb[RTA_SRC] && !rtm->rtm_src_len) || 2520 (tb[RTA_DST] && !rtm->rtm_dst_len)) { 2521 NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); 2522 return -EINVAL; 2523 } 2524 2525 for (i = 0; i <= RTA_MAX; i++) { 2526 if (!tb[i]) 2527 continue; 2528 2529 switch (i) { 2530 case RTA_SRC: 2531 case RTA_DST: 2532 case RTA_TABLE: 2533 break; 2534 default: 2535 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in multicast route get request"); 2536 return -EINVAL; 2537 } 2538 } 2539 2540 return 0; 2541 } 2542 2543 static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2544 struct netlink_ext_ack *extack) 2545 { 2546 struct net *net = sock_net(in_skb->sk); 2547 struct nlattr *tb[RTA_MAX + 1]; 2548 struct sk_buff *skb = NULL; 2549 struct mfc_cache *cache; 2550 struct mr_table *mrt; 2551 __be32 src, grp; 2552 u32 tableid; 2553 int err; 2554 2555 err = ipmr_rtm_valid_getroute_req(in_skb, nlh, tb, extack); 2556 if (err < 0) 2557 goto errout; 2558 2559 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; 2560 grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; 2561 tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; 2562 2563 mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); 2564 if (!mrt) { 2565 err = -ENOENT; 2566 goto errout_free; 2567 } 2568 2569 /* entries are added/deleted only under RTNL */ 2570 rcu_read_lock(); 2571 cache = ipmr_cache_find(mrt, src, grp); 2572 rcu_read_unlock(); 2573 if (!cache) { 2574 err = -ENOENT; 2575 goto errout_free; 2576 } 2577 2578 skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL); 2579 if (!skb) { 2580 err = -ENOBUFS; 2581 goto errout_free; 2582 } 2583 2584 err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, 2585 nlh->nlmsg_seq, cache, 2586 RTM_NEWROUTE, 0); 2587 if (err < 0) 2588 goto errout_free; 2589 2590 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2591 2592 errout: 2593 return err; 2594 2595 errout_free: 2596 kfree_skb(skb); 2597 goto errout; 2598 } 2599 2600 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2601 { 2602 struct fib_dump_filter filter = {}; 2603 int err; 2604 2605 if (cb->strict_check) { 2606 err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh, 2607 &filter, cb); 2608 if (err < 0) 2609 return err; 2610 } 2611 2612 if (filter.table_id) { 2613 struct mr_table *mrt; 2614 2615 mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id); 2616 if (!mrt) { 2617 if (filter.dump_all_families) 2618 return skb->len; 2619 2620 NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist"); 2621 return -ENOENT; 2622 } 2623 err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute, 2624 &mfc_unres_lock, &filter); 2625 return skb->len ? : err; 2626 } 2627 2628 return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter, 2629 _ipmr_fill_mroute, &mfc_unres_lock, &filter); 2630 } 2631 2632 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { 2633 [RTA_SRC] = { .type = NLA_U32 }, 2634 [RTA_DST] = { .type = NLA_U32 }, 2635 [RTA_IIF] = { .type = NLA_U32 }, 2636 [RTA_TABLE] = { .type = NLA_U32 }, 2637 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2638 }; 2639 2640 static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol) 2641 { 2642 switch (rtm_protocol) { 2643 case RTPROT_STATIC: 2644 case RTPROT_MROUTED: 2645 return true; 2646 } 2647 return false; 2648 } 2649 2650 static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc) 2651 { 2652 struct rtnexthop *rtnh = nla_data(nla); 2653 int remaining = nla_len(nla), vifi = 0; 2654 2655 while (rtnh_ok(rtnh, remaining)) { 2656 mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops; 2657 if (++vifi == MAXVIFS) 2658 break; 2659 rtnh = rtnh_next(rtnh, &remaining); 2660 } 2661 2662 return remaining > 0 ? -EINVAL : vifi; 2663 } 2664 2665 /* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */ 2666 static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, 2667 struct mfcctl *mfcc, int *mrtsock, 2668 struct mr_table **mrtret, 2669 struct netlink_ext_ack *extack) 2670 { 2671 struct net_device *dev = NULL; 2672 u32 tblid = RT_TABLE_DEFAULT; 2673 struct mr_table *mrt; 2674 struct nlattr *attr; 2675 struct rtmsg *rtm; 2676 int ret, rem; 2677 2678 ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy, 2679 extack); 2680 if (ret < 0) 2681 goto out; 2682 rtm = nlmsg_data(nlh); 2683 2684 ret = -EINVAL; 2685 if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 || 2686 rtm->rtm_type != RTN_MULTICAST || 2687 rtm->rtm_scope != RT_SCOPE_UNIVERSE || 2688 !ipmr_rtm_validate_proto(rtm->rtm_protocol)) 2689 goto out; 2690 2691 memset(mfcc, 0, sizeof(*mfcc)); 2692 mfcc->mfcc_parent = -1; 2693 ret = 0; 2694 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) { 2695 switch (nla_type(attr)) { 2696 case RTA_SRC: 2697 mfcc->mfcc_origin.s_addr = nla_get_be32(attr); 2698 break; 2699 case RTA_DST: 2700 mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); 2701 break; 2702 case RTA_IIF: 2703 dev = __dev_get_by_index(net, nla_get_u32(attr)); 2704 if (!dev) { 2705 ret = -ENODEV; 2706 goto out; 2707 } 2708 break; 2709 case RTA_MULTIPATH: 2710 if (ipmr_nla_get_ttls(attr, mfcc) < 0) { 2711 ret = -EINVAL; 2712 goto out; 2713 } 2714 break; 2715 case RTA_PREFSRC: 2716 ret = 1; 2717 break; 2718 case RTA_TABLE: 2719 tblid = nla_get_u32(attr); 2720 break; 2721 } 2722 } 2723 mrt = ipmr_get_table(net, tblid); 2724 if (!mrt) { 2725 ret = -ENOENT; 2726 goto out; 2727 } 2728 *mrtret = mrt; 2729 *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; 2730 if (dev) 2731 mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); 2732 2733 out: 2734 return ret; 2735 } 2736 2737 /* takes care of both newroute and delroute */ 2738 static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, 2739 struct netlink_ext_ack *extack) 2740 { 2741 struct net *net = sock_net(skb->sk); 2742 int ret, mrtsock, parent; 2743 struct mr_table *tbl; 2744 struct mfcctl mfcc; 2745 2746 mrtsock = 0; 2747 tbl = NULL; 2748 ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack); 2749 if (ret < 0) 2750 return ret; 2751 2752 parent = ret ? mfcc.mfcc_parent : -1; 2753 if (nlh->nlmsg_type == RTM_NEWROUTE) 2754 return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); 2755 else 2756 return ipmr_mfc_delete(tbl, &mfcc, parent); 2757 } 2758 2759 static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) 2760 { 2761 u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len); 2762 2763 if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) || 2764 nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) || 2765 nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM, 2766 mrt->mroute_reg_vif_num) || 2767 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT, 2768 mrt->mroute_do_assert) || 2769 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim) || 2770 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE, 2771 mrt->mroute_do_wrvifwhole)) 2772 return false; 2773 2774 return true; 2775 } 2776 2777 static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) 2778 { 2779 struct nlattr *vif_nest; 2780 struct vif_device *vif; 2781 2782 /* if the VIF doesn't exist just continue */ 2783 if (!VIF_EXISTS(mrt, vifid)) 2784 return true; 2785 2786 vif = &mrt->vif_table[vifid]; 2787 vif_nest = nla_nest_start(skb, IPMRA_VIF); 2788 if (!vif_nest) 2789 return false; 2790 if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) || 2791 nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || 2792 nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || 2793 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in, 2794 IPMRA_VIFA_PAD) || 2795 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out, 2796 IPMRA_VIFA_PAD) || 2797 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in, 2798 IPMRA_VIFA_PAD) || 2799 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out, 2800 IPMRA_VIFA_PAD) || 2801 nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) || 2802 nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) { 2803 nla_nest_cancel(skb, vif_nest); 2804 return false; 2805 } 2806 nla_nest_end(skb, vif_nest); 2807 2808 return true; 2809 } 2810 2811 static int ipmr_valid_dumplink(const struct nlmsghdr *nlh, 2812 struct netlink_ext_ack *extack) 2813 { 2814 struct ifinfomsg *ifm; 2815 2816 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { 2817 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for ipmr link dump"); 2818 return -EINVAL; 2819 } 2820 2821 if (nlmsg_attrlen(nlh, sizeof(*ifm))) { 2822 NL_SET_ERR_MSG(extack, "Invalid data after header in ipmr link dump"); 2823 return -EINVAL; 2824 } 2825 2826 ifm = nlmsg_data(nlh); 2827 if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || 2828 ifm->ifi_change || ifm->ifi_index) { 2829 NL_SET_ERR_MSG(extack, "Invalid values in header for ipmr link dump request"); 2830 return -EINVAL; 2831 } 2832 2833 return 0; 2834 } 2835 2836 static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) 2837 { 2838 struct net *net = sock_net(skb->sk); 2839 struct nlmsghdr *nlh = NULL; 2840 unsigned int t = 0, s_t; 2841 unsigned int e = 0, s_e; 2842 struct mr_table *mrt; 2843 2844 if (cb->strict_check) { 2845 int err = ipmr_valid_dumplink(cb->nlh, cb->extack); 2846 2847 if (err < 0) 2848 return err; 2849 } 2850 2851 s_t = cb->args[0]; 2852 s_e = cb->args[1]; 2853 2854 ipmr_for_each_table(mrt, net) { 2855 struct nlattr *vifs, *af; 2856 struct ifinfomsg *hdr; 2857 u32 i; 2858 2859 if (t < s_t) 2860 goto skip_table; 2861 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 2862 cb->nlh->nlmsg_seq, RTM_NEWLINK, 2863 sizeof(*hdr), NLM_F_MULTI); 2864 if (!nlh) 2865 break; 2866 2867 hdr = nlmsg_data(nlh); 2868 memset(hdr, 0, sizeof(*hdr)); 2869 hdr->ifi_family = RTNL_FAMILY_IPMR; 2870 2871 af = nla_nest_start(skb, IFLA_AF_SPEC); 2872 if (!af) { 2873 nlmsg_cancel(skb, nlh); 2874 goto out; 2875 } 2876 2877 if (!ipmr_fill_table(mrt, skb)) { 2878 nlmsg_cancel(skb, nlh); 2879 goto out; 2880 } 2881 2882 vifs = nla_nest_start(skb, IPMRA_TABLE_VIFS); 2883 if (!vifs) { 2884 nla_nest_end(skb, af); 2885 nlmsg_end(skb, nlh); 2886 goto out; 2887 } 2888 for (i = 0; i < mrt->maxvif; i++) { 2889 if (e < s_e) 2890 goto skip_entry; 2891 if (!ipmr_fill_vif(mrt, i, skb)) { 2892 nla_nest_end(skb, vifs); 2893 nla_nest_end(skb, af); 2894 nlmsg_end(skb, nlh); 2895 goto out; 2896 } 2897 skip_entry: 2898 e++; 2899 } 2900 s_e = 0; 2901 e = 0; 2902 nla_nest_end(skb, vifs); 2903 nla_nest_end(skb, af); 2904 nlmsg_end(skb, nlh); 2905 skip_table: 2906 t++; 2907 } 2908 2909 out: 2910 cb->args[1] = e; 2911 cb->args[0] = t; 2912 2913 return skb->len; 2914 } 2915 2916 #ifdef CONFIG_PROC_FS 2917 /* The /proc interfaces to multicast routing : 2918 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif 2919 */ 2920 2921 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 2922 __acquires(mrt_lock) 2923 { 2924 struct mr_vif_iter *iter = seq->private; 2925 struct net *net = seq_file_net(seq); 2926 struct mr_table *mrt; 2927 2928 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2929 if (!mrt) 2930 return ERR_PTR(-ENOENT); 2931 2932 iter->mrt = mrt; 2933 2934 read_lock(&mrt_lock); 2935 return mr_vif_seq_start(seq, pos); 2936 } 2937 2938 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 2939 __releases(mrt_lock) 2940 { 2941 read_unlock(&mrt_lock); 2942 } 2943 2944 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 2945 { 2946 struct mr_vif_iter *iter = seq->private; 2947 struct mr_table *mrt = iter->mrt; 2948 2949 if (v == SEQ_START_TOKEN) { 2950 seq_puts(seq, 2951 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 2952 } else { 2953 const struct vif_device *vif = v; 2954 const char *name = vif->dev ? 2955 vif->dev->name : "none"; 2956 2957 seq_printf(seq, 2958 "%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 2959 vif - mrt->vif_table, 2960 name, vif->bytes_in, vif->pkt_in, 2961 vif->bytes_out, vif->pkt_out, 2962 vif->flags, vif->local, vif->remote); 2963 } 2964 return 0; 2965 } 2966 2967 static const struct seq_operations ipmr_vif_seq_ops = { 2968 .start = ipmr_vif_seq_start, 2969 .next = mr_vif_seq_next, 2970 .stop = ipmr_vif_seq_stop, 2971 .show = ipmr_vif_seq_show, 2972 }; 2973 2974 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 2975 { 2976 struct net *net = seq_file_net(seq); 2977 struct mr_table *mrt; 2978 2979 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2980 if (!mrt) 2981 return ERR_PTR(-ENOENT); 2982 2983 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 2984 } 2985 2986 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 2987 { 2988 int n; 2989 2990 if (v == SEQ_START_TOKEN) { 2991 seq_puts(seq, 2992 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 2993 } else { 2994 const struct mfc_cache *mfc = v; 2995 const struct mr_mfc_iter *it = seq->private; 2996 const struct mr_table *mrt = it->mrt; 2997 2998 seq_printf(seq, "%08X %08X %-3hd", 2999 (__force u32) mfc->mfc_mcastgrp, 3000 (__force u32) mfc->mfc_origin, 3001 mfc->_c.mfc_parent); 3002 3003 if (it->cache != &mrt->mfc_unres_queue) { 3004 seq_printf(seq, " %8lu %8lu %8lu", 3005 mfc->_c.mfc_un.res.pkt, 3006 mfc->_c.mfc_un.res.bytes, 3007 mfc->_c.mfc_un.res.wrong_if); 3008 for (n = mfc->_c.mfc_un.res.minvif; 3009 n < mfc->_c.mfc_un.res.maxvif; n++) { 3010 if (VIF_EXISTS(mrt, n) && 3011 mfc->_c.mfc_un.res.ttls[n] < 255) 3012 seq_printf(seq, 3013 " %2d:%-3d", 3014 n, mfc->_c.mfc_un.res.ttls[n]); 3015 } 3016 } else { 3017 /* unresolved mfc_caches don't contain 3018 * pkt, bytes and wrong_if values 3019 */ 3020 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 3021 } 3022 seq_putc(seq, '\n'); 3023 } 3024 return 0; 3025 } 3026 3027 static const struct seq_operations ipmr_mfc_seq_ops = { 3028 .start = ipmr_mfc_seq_start, 3029 .next = mr_mfc_seq_next, 3030 .stop = mr_mfc_seq_stop, 3031 .show = ipmr_mfc_seq_show, 3032 }; 3033 #endif 3034 3035 #ifdef CONFIG_IP_PIMSM_V2 3036 static const struct net_protocol pim_protocol = { 3037 .handler = pim_rcv, 3038 .netns_ok = 1, 3039 }; 3040 #endif 3041 3042 static unsigned int ipmr_seq_read(struct net *net) 3043 { 3044 ASSERT_RTNL(); 3045 3046 return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net); 3047 } 3048 3049 static int ipmr_dump(struct net *net, struct notifier_block *nb) 3050 { 3051 return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump, 3052 ipmr_mr_table_iter, &mrt_lock); 3053 } 3054 3055 static const struct fib_notifier_ops ipmr_notifier_ops_template = { 3056 .family = RTNL_FAMILY_IPMR, 3057 .fib_seq_read = ipmr_seq_read, 3058 .fib_dump = ipmr_dump, 3059 .owner = THIS_MODULE, 3060 }; 3061 3062 static int __net_init ipmr_notifier_init(struct net *net) 3063 { 3064 struct fib_notifier_ops *ops; 3065 3066 net->ipv4.ipmr_seq = 0; 3067 3068 ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net); 3069 if (IS_ERR(ops)) 3070 return PTR_ERR(ops); 3071 net->ipv4.ipmr_notifier_ops = ops; 3072 3073 return 0; 3074 } 3075 3076 static void __net_exit ipmr_notifier_exit(struct net *net) 3077 { 3078 fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops); 3079 net->ipv4.ipmr_notifier_ops = NULL; 3080 } 3081 3082 /* Setup for IP multicast routing */ 3083 static int __net_init ipmr_net_init(struct net *net) 3084 { 3085 int err; 3086 3087 err = ipmr_notifier_init(net); 3088 if (err) 3089 goto ipmr_notifier_fail; 3090 3091 err = ipmr_rules_init(net); 3092 if (err < 0) 3093 goto ipmr_rules_fail; 3094 3095 #ifdef CONFIG_PROC_FS 3096 err = -ENOMEM; 3097 if (!proc_create_net("ip_mr_vif", 0, net->proc_net, &ipmr_vif_seq_ops, 3098 sizeof(struct mr_vif_iter))) 3099 goto proc_vif_fail; 3100 if (!proc_create_net("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 3101 sizeof(struct mr_mfc_iter))) 3102 goto proc_cache_fail; 3103 #endif 3104 return 0; 3105 3106 #ifdef CONFIG_PROC_FS 3107 proc_cache_fail: 3108 remove_proc_entry("ip_mr_vif", net->proc_net); 3109 proc_vif_fail: 3110 ipmr_rules_exit(net); 3111 #endif 3112 ipmr_rules_fail: 3113 ipmr_notifier_exit(net); 3114 ipmr_notifier_fail: 3115 return err; 3116 } 3117 3118 static void __net_exit ipmr_net_exit(struct net *net) 3119 { 3120 #ifdef CONFIG_PROC_FS 3121 remove_proc_entry("ip_mr_cache", net->proc_net); 3122 remove_proc_entry("ip_mr_vif", net->proc_net); 3123 #endif 3124 ipmr_notifier_exit(net); 3125 ipmr_rules_exit(net); 3126 } 3127 3128 static struct pernet_operations ipmr_net_ops = { 3129 .init = ipmr_net_init, 3130 .exit = ipmr_net_exit, 3131 }; 3132 3133 int __init ip_mr_init(void) 3134 { 3135 int err; 3136 3137 mrt_cachep = kmem_cache_create("ip_mrt_cache", 3138 sizeof(struct mfc_cache), 3139 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, 3140 NULL); 3141 3142 err = register_pernet_subsys(&ipmr_net_ops); 3143 if (err) 3144 goto reg_pernet_fail; 3145 3146 err = register_netdevice_notifier(&ip_mr_notifier); 3147 if (err) 3148 goto reg_notif_fail; 3149 #ifdef CONFIG_IP_PIMSM_V2 3150 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { 3151 pr_err("%s: can't add PIM protocol\n", __func__); 3152 err = -EAGAIN; 3153 goto add_proto_fail; 3154 } 3155 #endif 3156 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, 3157 ipmr_rtm_getroute, ipmr_rtm_dumproute, 0); 3158 rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, 3159 ipmr_rtm_route, NULL, 0); 3160 rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, 3161 ipmr_rtm_route, NULL, 0); 3162 3163 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK, 3164 NULL, ipmr_rtm_dumplink, 0); 3165 return 0; 3166 3167 #ifdef CONFIG_IP_PIMSM_V2 3168 add_proto_fail: 3169 unregister_netdevice_notifier(&ip_mr_notifier); 3170 #endif 3171 reg_notif_fail: 3172 unregister_pernet_subsys(&ipmr_net_ops); 3173 reg_pernet_fail: 3174 kmem_cache_destroy(mrt_cachep); 3175 return err; 3176 } 3177