1 /* 2 * IP multicast routing support for mrouted 3.6/3.8 3 * 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> 5 * Linux Consultancy and Custom Driver Development 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Fixes: 13 * Michael Chastain : Incorrect size of copying. 14 * Alan Cox : Added the cache manager code 15 * Alan Cox : Fixed the clone/copy bug and device race. 16 * Mike McLagan : Routing by source 17 * Malcolm Beattie : Buffer handling fixes. 18 * Alexey Kuznetsov : Double buffer free and other fixes. 19 * SVR Anand : Fixed several multicast bugs and problems. 20 * Alexey Kuznetsov : Status, optimisations and more. 21 * Brad Parker : Better behaviour on mrouted upcall 22 * overflow. 23 * Carlos Picoto : PIMv1 Support 24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 25 * Relax this requirement to work with older peers. 26 * 27 */ 28 29 #include <linux/uaccess.h> 30 #include <linux/types.h> 31 #include <linux/cache.h> 32 #include <linux/capability.h> 33 #include <linux/errno.h> 34 #include <linux/mm.h> 35 #include <linux/kernel.h> 36 #include <linux/fcntl.h> 37 #include <linux/stat.h> 38 #include <linux/socket.h> 39 #include <linux/in.h> 40 #include <linux/inet.h> 41 #include <linux/netdevice.h> 42 #include <linux/inetdevice.h> 43 #include <linux/igmp.h> 44 #include <linux/proc_fs.h> 45 #include <linux/seq_file.h> 46 #include <linux/mroute.h> 47 #include <linux/init.h> 48 #include <linux/if_ether.h> 49 #include <linux/slab.h> 50 #include <net/net_namespace.h> 51 #include <net/ip.h> 52 #include <net/protocol.h> 53 #include <linux/skbuff.h> 54 #include <net/route.h> 55 #include <net/icmp.h> 56 #include <net/udp.h> 57 #include <net/raw.h> 58 #include <linux/notifier.h> 59 #include <linux/if_arp.h> 60 #include <linux/netfilter_ipv4.h> 61 #include <linux/compat.h> 62 #include <linux/export.h> 63 #include <linux/rhashtable.h> 64 #include <net/ip_tunnels.h> 65 #include <net/checksum.h> 66 #include <net/netlink.h> 67 #include <net/fib_rules.h> 68 #include <linux/netconf.h> 69 #include <net/nexthop.h> 70 #include <net/switchdev.h> 71 72 #include <linux/nospec.h> 73 74 struct ipmr_rule { 75 struct fib_rule common; 76 }; 77 78 struct ipmr_result { 79 struct mr_table *mrt; 80 }; 81 82 /* Big lock, protecting vif table, mrt cache and mroute socket state. 83 * Note that the changes are semaphored via rtnl_lock. 84 */ 85 86 static DEFINE_RWLOCK(mrt_lock); 87 88 /* Multicast router control variables */ 89 90 /* Special spinlock for queue of unresolved entries */ 91 static DEFINE_SPINLOCK(mfc_unres_lock); 92 93 /* We return to original Alan's scheme. Hash table of resolved 94 * entries is changed only in process context and protected 95 * with weak lock mrt_lock. Queue of unresolved entries is protected 96 * with strong spinlock mfc_unres_lock. 97 * 98 * In this case data path is free of exclusive locks at all. 99 */ 100 101 static struct kmem_cache *mrt_cachep __ro_after_init; 102 103 static struct mr_table *ipmr_new_table(struct net *net, u32 id); 104 static void ipmr_free_table(struct mr_table *mrt); 105 106 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 107 struct net_device *dev, struct sk_buff *skb, 108 struct mfc_cache *cache, int local); 109 static int ipmr_cache_report(struct mr_table *mrt, 110 struct sk_buff *pkt, vifi_t vifi, int assert); 111 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 112 int cmd); 113 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); 114 static void mroute_clean_tables(struct mr_table *mrt, bool all); 115 static void ipmr_expire_process(struct timer_list *t); 116 117 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 118 #define ipmr_for_each_table(mrt, net) \ 119 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) 120 121 static struct mr_table *ipmr_mr_table_iter(struct net *net, 122 struct mr_table *mrt) 123 { 124 struct mr_table *ret; 125 126 if (!mrt) 127 ret = list_entry_rcu(net->ipv4.mr_tables.next, 128 struct mr_table, list); 129 else 130 ret = list_entry_rcu(mrt->list.next, 131 struct mr_table, list); 132 133 if (&ret->list == &net->ipv4.mr_tables) 134 return NULL; 135 return ret; 136 } 137 138 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 139 { 140 struct mr_table *mrt; 141 142 ipmr_for_each_table(mrt, net) { 143 if (mrt->id == id) 144 return mrt; 145 } 146 return NULL; 147 } 148 149 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 150 struct mr_table **mrt) 151 { 152 int err; 153 struct ipmr_result res; 154 struct fib_lookup_arg arg = { 155 .result = &res, 156 .flags = FIB_LOOKUP_NOREF, 157 }; 158 159 /* update flow if oif or iif point to device enslaved to l3mdev */ 160 l3mdev_update_flow(net, flowi4_to_flowi(flp4)); 161 162 err = fib_rules_lookup(net->ipv4.mr_rules_ops, 163 flowi4_to_flowi(flp4), 0, &arg); 164 if (err < 0) 165 return err; 166 *mrt = res.mrt; 167 return 0; 168 } 169 170 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, 171 int flags, struct fib_lookup_arg *arg) 172 { 173 struct ipmr_result *res = arg->result; 174 struct mr_table *mrt; 175 176 switch (rule->action) { 177 case FR_ACT_TO_TBL: 178 break; 179 case FR_ACT_UNREACHABLE: 180 return -ENETUNREACH; 181 case FR_ACT_PROHIBIT: 182 return -EACCES; 183 case FR_ACT_BLACKHOLE: 184 default: 185 return -EINVAL; 186 } 187 188 arg->table = fib_rule_get_table(rule, arg); 189 190 mrt = ipmr_get_table(rule->fr_net, arg->table); 191 if (!mrt) 192 return -EAGAIN; 193 res->mrt = mrt; 194 return 0; 195 } 196 197 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 198 { 199 return 1; 200 } 201 202 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { 203 FRA_GENERIC_POLICY, 204 }; 205 206 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 207 struct fib_rule_hdr *frh, struct nlattr **tb, 208 struct netlink_ext_ack *extack) 209 { 210 return 0; 211 } 212 213 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 214 struct nlattr **tb) 215 { 216 return 1; 217 } 218 219 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 220 struct fib_rule_hdr *frh) 221 { 222 frh->dst_len = 0; 223 frh->src_len = 0; 224 frh->tos = 0; 225 return 0; 226 } 227 228 static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { 229 .family = RTNL_FAMILY_IPMR, 230 .rule_size = sizeof(struct ipmr_rule), 231 .addr_size = sizeof(u32), 232 .action = ipmr_rule_action, 233 .match = ipmr_rule_match, 234 .configure = ipmr_rule_configure, 235 .compare = ipmr_rule_compare, 236 .fill = ipmr_rule_fill, 237 .nlgroup = RTNLGRP_IPV4_RULE, 238 .policy = ipmr_rule_policy, 239 .owner = THIS_MODULE, 240 }; 241 242 static int __net_init ipmr_rules_init(struct net *net) 243 { 244 struct fib_rules_ops *ops; 245 struct mr_table *mrt; 246 int err; 247 248 ops = fib_rules_register(&ipmr_rules_ops_template, net); 249 if (IS_ERR(ops)) 250 return PTR_ERR(ops); 251 252 INIT_LIST_HEAD(&net->ipv4.mr_tables); 253 254 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 255 if (IS_ERR(mrt)) { 256 err = PTR_ERR(mrt); 257 goto err1; 258 } 259 260 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); 261 if (err < 0) 262 goto err2; 263 264 net->ipv4.mr_rules_ops = ops; 265 return 0; 266 267 err2: 268 ipmr_free_table(mrt); 269 err1: 270 fib_rules_unregister(ops); 271 return err; 272 } 273 274 static void __net_exit ipmr_rules_exit(struct net *net) 275 { 276 struct mr_table *mrt, *next; 277 278 rtnl_lock(); 279 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { 280 list_del(&mrt->list); 281 ipmr_free_table(mrt); 282 } 283 fib_rules_unregister(net->ipv4.mr_rules_ops); 284 rtnl_unlock(); 285 } 286 287 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) 288 { 289 return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR); 290 } 291 292 static unsigned int ipmr_rules_seq_read(struct net *net) 293 { 294 return fib_rules_seq_read(net, RTNL_FAMILY_IPMR); 295 } 296 297 bool ipmr_rule_default(const struct fib_rule *rule) 298 { 299 return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT; 300 } 301 EXPORT_SYMBOL(ipmr_rule_default); 302 #else 303 #define ipmr_for_each_table(mrt, net) \ 304 for (mrt = net->ipv4.mrt; mrt; mrt = NULL) 305 306 static struct mr_table *ipmr_mr_table_iter(struct net *net, 307 struct mr_table *mrt) 308 { 309 if (!mrt) 310 return net->ipv4.mrt; 311 return NULL; 312 } 313 314 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 315 { 316 return net->ipv4.mrt; 317 } 318 319 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 320 struct mr_table **mrt) 321 { 322 *mrt = net->ipv4.mrt; 323 return 0; 324 } 325 326 static int __net_init ipmr_rules_init(struct net *net) 327 { 328 struct mr_table *mrt; 329 330 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 331 if (IS_ERR(mrt)) 332 return PTR_ERR(mrt); 333 net->ipv4.mrt = mrt; 334 return 0; 335 } 336 337 static void __net_exit ipmr_rules_exit(struct net *net) 338 { 339 rtnl_lock(); 340 ipmr_free_table(net->ipv4.mrt); 341 net->ipv4.mrt = NULL; 342 rtnl_unlock(); 343 } 344 345 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) 346 { 347 return 0; 348 } 349 350 static unsigned int ipmr_rules_seq_read(struct net *net) 351 { 352 return 0; 353 } 354 355 bool ipmr_rule_default(const struct fib_rule *rule) 356 { 357 return true; 358 } 359 EXPORT_SYMBOL(ipmr_rule_default); 360 #endif 361 362 static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, 363 const void *ptr) 364 { 365 const struct mfc_cache_cmp_arg *cmparg = arg->key; 366 struct mfc_cache *c = (struct mfc_cache *)ptr; 367 368 return cmparg->mfc_mcastgrp != c->mfc_mcastgrp || 369 cmparg->mfc_origin != c->mfc_origin; 370 } 371 372 static const struct rhashtable_params ipmr_rht_params = { 373 .head_offset = offsetof(struct mr_mfc, mnode), 374 .key_offset = offsetof(struct mfc_cache, cmparg), 375 .key_len = sizeof(struct mfc_cache_cmp_arg), 376 .nelem_hint = 3, 377 .locks_mul = 1, 378 .obj_cmpfn = ipmr_hash_cmp, 379 .automatic_shrinking = true, 380 }; 381 382 static void ipmr_new_table_set(struct mr_table *mrt, 383 struct net *net) 384 { 385 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 386 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); 387 #endif 388 } 389 390 static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = { 391 .mfc_mcastgrp = htonl(INADDR_ANY), 392 .mfc_origin = htonl(INADDR_ANY), 393 }; 394 395 static struct mr_table_ops ipmr_mr_table_ops = { 396 .rht_params = &ipmr_rht_params, 397 .cmparg_any = &ipmr_mr_table_ops_cmparg_any, 398 }; 399 400 static struct mr_table *ipmr_new_table(struct net *net, u32 id) 401 { 402 struct mr_table *mrt; 403 404 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ 405 if (id != RT_TABLE_DEFAULT && id >= 1000000000) 406 return ERR_PTR(-EINVAL); 407 408 mrt = ipmr_get_table(net, id); 409 if (mrt) 410 return mrt; 411 412 return mr_table_alloc(net, id, &ipmr_mr_table_ops, 413 ipmr_expire_process, ipmr_new_table_set); 414 } 415 416 static void ipmr_free_table(struct mr_table *mrt) 417 { 418 del_timer_sync(&mrt->ipmr_expire_timer); 419 mroute_clean_tables(mrt, true); 420 rhltable_destroy(&mrt->mfc_hash); 421 kfree(mrt); 422 } 423 424 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 425 426 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) 427 { 428 struct net *net = dev_net(dev); 429 430 dev_close(dev); 431 432 dev = __dev_get_by_name(net, "tunl0"); 433 if (dev) { 434 const struct net_device_ops *ops = dev->netdev_ops; 435 struct ifreq ifr; 436 struct ip_tunnel_parm p; 437 438 memset(&p, 0, sizeof(p)); 439 p.iph.daddr = v->vifc_rmt_addr.s_addr; 440 p.iph.saddr = v->vifc_lcl_addr.s_addr; 441 p.iph.version = 4; 442 p.iph.ihl = 5; 443 p.iph.protocol = IPPROTO_IPIP; 444 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 445 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 446 447 if (ops->ndo_do_ioctl) { 448 mm_segment_t oldfs = get_fs(); 449 450 set_fs(KERNEL_DS); 451 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); 452 set_fs(oldfs); 453 } 454 } 455 } 456 457 /* Initialize ipmr pimreg/tunnel in_device */ 458 static bool ipmr_init_vif_indev(const struct net_device *dev) 459 { 460 struct in_device *in_dev; 461 462 ASSERT_RTNL(); 463 464 in_dev = __in_dev_get_rtnl(dev); 465 if (!in_dev) 466 return false; 467 ipv4_devconf_setall(in_dev); 468 neigh_parms_data_state_setall(in_dev->arp_parms); 469 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 470 471 return true; 472 } 473 474 static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) 475 { 476 struct net_device *dev; 477 478 dev = __dev_get_by_name(net, "tunl0"); 479 480 if (dev) { 481 const struct net_device_ops *ops = dev->netdev_ops; 482 int err; 483 struct ifreq ifr; 484 struct ip_tunnel_parm p; 485 486 memset(&p, 0, sizeof(p)); 487 p.iph.daddr = v->vifc_rmt_addr.s_addr; 488 p.iph.saddr = v->vifc_lcl_addr.s_addr; 489 p.iph.version = 4; 490 p.iph.ihl = 5; 491 p.iph.protocol = IPPROTO_IPIP; 492 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 493 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 494 495 if (ops->ndo_do_ioctl) { 496 mm_segment_t oldfs = get_fs(); 497 498 set_fs(KERNEL_DS); 499 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); 500 set_fs(oldfs); 501 } else { 502 err = -EOPNOTSUPP; 503 } 504 dev = NULL; 505 506 if (err == 0 && 507 (dev = __dev_get_by_name(net, p.name)) != NULL) { 508 dev->flags |= IFF_MULTICAST; 509 if (!ipmr_init_vif_indev(dev)) 510 goto failure; 511 if (dev_open(dev, NULL)) 512 goto failure; 513 dev_hold(dev); 514 } 515 } 516 return dev; 517 518 failure: 519 unregister_netdevice(dev); 520 return NULL; 521 } 522 523 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 524 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 525 { 526 struct net *net = dev_net(dev); 527 struct mr_table *mrt; 528 struct flowi4 fl4 = { 529 .flowi4_oif = dev->ifindex, 530 .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 531 .flowi4_mark = skb->mark, 532 }; 533 int err; 534 535 err = ipmr_fib_lookup(net, &fl4, &mrt); 536 if (err < 0) { 537 kfree_skb(skb); 538 return err; 539 } 540 541 read_lock(&mrt_lock); 542 dev->stats.tx_bytes += skb->len; 543 dev->stats.tx_packets++; 544 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); 545 read_unlock(&mrt_lock); 546 kfree_skb(skb); 547 return NETDEV_TX_OK; 548 } 549 550 static int reg_vif_get_iflink(const struct net_device *dev) 551 { 552 return 0; 553 } 554 555 static const struct net_device_ops reg_vif_netdev_ops = { 556 .ndo_start_xmit = reg_vif_xmit, 557 .ndo_get_iflink = reg_vif_get_iflink, 558 }; 559 560 static void reg_vif_setup(struct net_device *dev) 561 { 562 dev->type = ARPHRD_PIMREG; 563 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 564 dev->flags = IFF_NOARP; 565 dev->netdev_ops = ®_vif_netdev_ops; 566 dev->needs_free_netdev = true; 567 dev->features |= NETIF_F_NETNS_LOCAL; 568 } 569 570 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 571 { 572 struct net_device *dev; 573 char name[IFNAMSIZ]; 574 575 if (mrt->id == RT_TABLE_DEFAULT) 576 sprintf(name, "pimreg"); 577 else 578 sprintf(name, "pimreg%u", mrt->id); 579 580 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 581 582 if (!dev) 583 return NULL; 584 585 dev_net_set(dev, net); 586 587 if (register_netdevice(dev)) { 588 free_netdev(dev); 589 return NULL; 590 } 591 592 if (!ipmr_init_vif_indev(dev)) 593 goto failure; 594 if (dev_open(dev, NULL)) 595 goto failure; 596 597 dev_hold(dev); 598 599 return dev; 600 601 failure: 602 unregister_netdevice(dev); 603 return NULL; 604 } 605 606 /* called with rcu_read_lock() */ 607 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 608 unsigned int pimlen) 609 { 610 struct net_device *reg_dev = NULL; 611 struct iphdr *encap; 612 613 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 614 /* Check that: 615 * a. packet is really sent to a multicast group 616 * b. packet is not a NULL-REGISTER 617 * c. packet is not truncated 618 */ 619 if (!ipv4_is_multicast(encap->daddr) || 620 encap->tot_len == 0 || 621 ntohs(encap->tot_len) + pimlen > skb->len) 622 return 1; 623 624 read_lock(&mrt_lock); 625 if (mrt->mroute_reg_vif_num >= 0) 626 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; 627 read_unlock(&mrt_lock); 628 629 if (!reg_dev) 630 return 1; 631 632 skb->mac_header = skb->network_header; 633 skb_pull(skb, (u8 *)encap - skb->data); 634 skb_reset_network_header(skb); 635 skb->protocol = htons(ETH_P_IP); 636 skb->ip_summed = CHECKSUM_NONE; 637 638 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 639 640 netif_rx(skb); 641 642 return NET_RX_SUCCESS; 643 } 644 #else 645 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 646 { 647 return NULL; 648 } 649 #endif 650 651 static int call_ipmr_vif_entry_notifiers(struct net *net, 652 enum fib_event_type event_type, 653 struct vif_device *vif, 654 vifi_t vif_index, u32 tb_id) 655 { 656 return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type, 657 vif, vif_index, tb_id, 658 &net->ipv4.ipmr_seq); 659 } 660 661 static int call_ipmr_mfc_entry_notifiers(struct net *net, 662 enum fib_event_type event_type, 663 struct mfc_cache *mfc, u32 tb_id) 664 { 665 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type, 666 &mfc->_c, tb_id, &net->ipv4.ipmr_seq); 667 } 668 669 /** 670 * vif_delete - Delete a VIF entry 671 * @notify: Set to 1, if the caller is a notifier_call 672 */ 673 static int vif_delete(struct mr_table *mrt, int vifi, int notify, 674 struct list_head *head) 675 { 676 struct net *net = read_pnet(&mrt->net); 677 struct vif_device *v; 678 struct net_device *dev; 679 struct in_device *in_dev; 680 681 if (vifi < 0 || vifi >= mrt->maxvif) 682 return -EADDRNOTAVAIL; 683 684 v = &mrt->vif_table[vifi]; 685 686 if (VIF_EXISTS(mrt, vifi)) 687 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi, 688 mrt->id); 689 690 write_lock_bh(&mrt_lock); 691 dev = v->dev; 692 v->dev = NULL; 693 694 if (!dev) { 695 write_unlock_bh(&mrt_lock); 696 return -EADDRNOTAVAIL; 697 } 698 699 if (vifi == mrt->mroute_reg_vif_num) 700 mrt->mroute_reg_vif_num = -1; 701 702 if (vifi + 1 == mrt->maxvif) { 703 int tmp; 704 705 for (tmp = vifi - 1; tmp >= 0; tmp--) { 706 if (VIF_EXISTS(mrt, tmp)) 707 break; 708 } 709 mrt->maxvif = tmp+1; 710 } 711 712 write_unlock_bh(&mrt_lock); 713 714 dev_set_allmulti(dev, -1); 715 716 in_dev = __in_dev_get_rtnl(dev); 717 if (in_dev) { 718 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 719 inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 720 NETCONFA_MC_FORWARDING, 721 dev->ifindex, &in_dev->cnf); 722 ip_rt_multicast_event(in_dev); 723 } 724 725 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify) 726 unregister_netdevice_queue(dev, head); 727 728 dev_put(dev); 729 return 0; 730 } 731 732 static void ipmr_cache_free_rcu(struct rcu_head *head) 733 { 734 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 735 736 kmem_cache_free(mrt_cachep, (struct mfc_cache *)c); 737 } 738 739 static void ipmr_cache_free(struct mfc_cache *c) 740 { 741 call_rcu(&c->_c.rcu, ipmr_cache_free_rcu); 742 } 743 744 /* Destroy an unresolved cache entry, killing queued skbs 745 * and reporting error to netlink readers. 746 */ 747 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 748 { 749 struct net *net = read_pnet(&mrt->net); 750 struct sk_buff *skb; 751 struct nlmsgerr *e; 752 753 atomic_dec(&mrt->cache_resolve_queue_len); 754 755 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) { 756 if (ip_hdr(skb)->version == 0) { 757 struct nlmsghdr *nlh = skb_pull(skb, 758 sizeof(struct iphdr)); 759 nlh->nlmsg_type = NLMSG_ERROR; 760 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 761 skb_trim(skb, nlh->nlmsg_len); 762 e = nlmsg_data(nlh); 763 e->error = -ETIMEDOUT; 764 memset(&e->msg, 0, sizeof(e->msg)); 765 766 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 767 } else { 768 kfree_skb(skb); 769 } 770 } 771 772 ipmr_cache_free(c); 773 } 774 775 /* Timer process for the unresolved queue. */ 776 static void ipmr_expire_process(struct timer_list *t) 777 { 778 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); 779 struct mr_mfc *c, *next; 780 unsigned long expires; 781 unsigned long now; 782 783 if (!spin_trylock(&mfc_unres_lock)) { 784 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); 785 return; 786 } 787 788 if (list_empty(&mrt->mfc_unres_queue)) 789 goto out; 790 791 now = jiffies; 792 expires = 10*HZ; 793 794 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 795 if (time_after(c->mfc_un.unres.expires, now)) { 796 unsigned long interval = c->mfc_un.unres.expires - now; 797 if (interval < expires) 798 expires = interval; 799 continue; 800 } 801 802 list_del(&c->list); 803 mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE); 804 ipmr_destroy_unres(mrt, (struct mfc_cache *)c); 805 } 806 807 if (!list_empty(&mrt->mfc_unres_queue)) 808 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 809 810 out: 811 spin_unlock(&mfc_unres_lock); 812 } 813 814 /* Fill oifs list. It is called under write locked mrt_lock. */ 815 static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, 816 unsigned char *ttls) 817 { 818 int vifi; 819 820 cache->mfc_un.res.minvif = MAXVIFS; 821 cache->mfc_un.res.maxvif = 0; 822 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 823 824 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 825 if (VIF_EXISTS(mrt, vifi) && 826 ttls[vifi] && ttls[vifi] < 255) { 827 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 828 if (cache->mfc_un.res.minvif > vifi) 829 cache->mfc_un.res.minvif = vifi; 830 if (cache->mfc_un.res.maxvif <= vifi) 831 cache->mfc_un.res.maxvif = vifi + 1; 832 } 833 } 834 cache->mfc_un.res.lastuse = jiffies; 835 } 836 837 static int vif_add(struct net *net, struct mr_table *mrt, 838 struct vifctl *vifc, int mrtsock) 839 { 840 int vifi = vifc->vifc_vifi; 841 struct switchdev_attr attr = { 842 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, 843 }; 844 struct vif_device *v = &mrt->vif_table[vifi]; 845 struct net_device *dev; 846 struct in_device *in_dev; 847 int err; 848 849 /* Is vif busy ? */ 850 if (VIF_EXISTS(mrt, vifi)) 851 return -EADDRINUSE; 852 853 switch (vifc->vifc_flags) { 854 case VIFF_REGISTER: 855 if (!ipmr_pimsm_enabled()) 856 return -EINVAL; 857 /* Special Purpose VIF in PIM 858 * All the packets will be sent to the daemon 859 */ 860 if (mrt->mroute_reg_vif_num >= 0) 861 return -EADDRINUSE; 862 dev = ipmr_reg_vif(net, mrt); 863 if (!dev) 864 return -ENOBUFS; 865 err = dev_set_allmulti(dev, 1); 866 if (err) { 867 unregister_netdevice(dev); 868 dev_put(dev); 869 return err; 870 } 871 break; 872 case VIFF_TUNNEL: 873 dev = ipmr_new_tunnel(net, vifc); 874 if (!dev) 875 return -ENOBUFS; 876 err = dev_set_allmulti(dev, 1); 877 if (err) { 878 ipmr_del_tunnel(dev, vifc); 879 dev_put(dev); 880 return err; 881 } 882 break; 883 case VIFF_USE_IFINDEX: 884 case 0: 885 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 886 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 887 if (dev && !__in_dev_get_rtnl(dev)) { 888 dev_put(dev); 889 return -EADDRNOTAVAIL; 890 } 891 } else { 892 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 893 } 894 if (!dev) 895 return -EADDRNOTAVAIL; 896 err = dev_set_allmulti(dev, 1); 897 if (err) { 898 dev_put(dev); 899 return err; 900 } 901 break; 902 default: 903 return -EINVAL; 904 } 905 906 in_dev = __in_dev_get_rtnl(dev); 907 if (!in_dev) { 908 dev_put(dev); 909 return -EADDRNOTAVAIL; 910 } 911 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 912 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, 913 dev->ifindex, &in_dev->cnf); 914 ip_rt_multicast_event(in_dev); 915 916 /* Fill in the VIF structures */ 917 vif_device_init(v, dev, vifc->vifc_rate_limit, 918 vifc->vifc_threshold, 919 vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0), 920 (VIFF_TUNNEL | VIFF_REGISTER)); 921 922 attr.orig_dev = dev; 923 if (!switchdev_port_attr_get(dev, &attr)) { 924 memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len); 925 v->dev_parent_id.id_len = attr.u.ppid.id_len; 926 } else { 927 v->dev_parent_id.id_len = 0; 928 } 929 930 v->local = vifc->vifc_lcl_addr.s_addr; 931 v->remote = vifc->vifc_rmt_addr.s_addr; 932 933 /* And finish update writing critical data */ 934 write_lock_bh(&mrt_lock); 935 v->dev = dev; 936 if (v->flags & VIFF_REGISTER) 937 mrt->mroute_reg_vif_num = vifi; 938 if (vifi+1 > mrt->maxvif) 939 mrt->maxvif = vifi+1; 940 write_unlock_bh(&mrt_lock); 941 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id); 942 return 0; 943 } 944 945 /* called with rcu_read_lock() */ 946 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 947 __be32 origin, 948 __be32 mcastgrp) 949 { 950 struct mfc_cache_cmp_arg arg = { 951 .mfc_mcastgrp = mcastgrp, 952 .mfc_origin = origin 953 }; 954 955 return mr_mfc_find(mrt, &arg); 956 } 957 958 /* Look for a (*,G) entry */ 959 static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, 960 __be32 mcastgrp, int vifi) 961 { 962 struct mfc_cache_cmp_arg arg = { 963 .mfc_mcastgrp = mcastgrp, 964 .mfc_origin = htonl(INADDR_ANY) 965 }; 966 967 if (mcastgrp == htonl(INADDR_ANY)) 968 return mr_mfc_find_any_parent(mrt, vifi); 969 return mr_mfc_find_any(mrt, vifi, &arg); 970 } 971 972 /* Look for a (S,G,iif) entry if parent != -1 */ 973 static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt, 974 __be32 origin, __be32 mcastgrp, 975 int parent) 976 { 977 struct mfc_cache_cmp_arg arg = { 978 .mfc_mcastgrp = mcastgrp, 979 .mfc_origin = origin, 980 }; 981 982 return mr_mfc_find_parent(mrt, &arg, parent); 983 } 984 985 /* Allocate a multicast cache entry */ 986 static struct mfc_cache *ipmr_cache_alloc(void) 987 { 988 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 989 990 if (c) { 991 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 992 c->_c.mfc_un.res.minvif = MAXVIFS; 993 c->_c.free = ipmr_cache_free_rcu; 994 refcount_set(&c->_c.mfc_un.res.refcount, 1); 995 } 996 return c; 997 } 998 999 static struct mfc_cache *ipmr_cache_alloc_unres(void) 1000 { 1001 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1002 1003 if (c) { 1004 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 1005 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 1006 } 1007 return c; 1008 } 1009 1010 /* A cache entry has gone into a resolved state from queued */ 1011 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, 1012 struct mfc_cache *uc, struct mfc_cache *c) 1013 { 1014 struct sk_buff *skb; 1015 struct nlmsgerr *e; 1016 1017 /* Play the pending entries through our router */ 1018 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 1019 if (ip_hdr(skb)->version == 0) { 1020 struct nlmsghdr *nlh = skb_pull(skb, 1021 sizeof(struct iphdr)); 1022 1023 if (mr_fill_mroute(mrt, skb, &c->_c, 1024 nlmsg_data(nlh)) > 0) { 1025 nlh->nlmsg_len = skb_tail_pointer(skb) - 1026 (u8 *)nlh; 1027 } else { 1028 nlh->nlmsg_type = NLMSG_ERROR; 1029 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1030 skb_trim(skb, nlh->nlmsg_len); 1031 e = nlmsg_data(nlh); 1032 e->error = -EMSGSIZE; 1033 memset(&e->msg, 0, sizeof(e->msg)); 1034 } 1035 1036 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1037 } else { 1038 ip_mr_forward(net, mrt, skb->dev, skb, c, 0); 1039 } 1040 } 1041 } 1042 1043 /* Bounce a cache query up to mrouted and netlink. 1044 * 1045 * Called under mrt_lock. 1046 */ 1047 static int ipmr_cache_report(struct mr_table *mrt, 1048 struct sk_buff *pkt, vifi_t vifi, int assert) 1049 { 1050 const int ihl = ip_hdrlen(pkt); 1051 struct sock *mroute_sk; 1052 struct igmphdr *igmp; 1053 struct igmpmsg *msg; 1054 struct sk_buff *skb; 1055 int ret; 1056 1057 if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) 1058 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 1059 else 1060 skb = alloc_skb(128, GFP_ATOMIC); 1061 1062 if (!skb) 1063 return -ENOBUFS; 1064 1065 if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) { 1066 /* Ugly, but we have no choice with this interface. 1067 * Duplicate old header, fix ihl, length etc. 1068 * And all this only to mangle msg->im_msgtype and 1069 * to set msg->im_mbz to "mbz" :-) 1070 */ 1071 skb_push(skb, sizeof(struct iphdr)); 1072 skb_reset_network_header(skb); 1073 skb_reset_transport_header(skb); 1074 msg = (struct igmpmsg *)skb_network_header(skb); 1075 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 1076 msg->im_msgtype = assert; 1077 msg->im_mbz = 0; 1078 if (assert == IGMPMSG_WRVIFWHOLE) 1079 msg->im_vif = vifi; 1080 else 1081 msg->im_vif = mrt->mroute_reg_vif_num; 1082 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 1083 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 1084 sizeof(struct iphdr)); 1085 } else { 1086 /* Copy the IP header */ 1087 skb_set_network_header(skb, skb->len); 1088 skb_put(skb, ihl); 1089 skb_copy_to_linear_data(skb, pkt->data, ihl); 1090 /* Flag to the kernel this is a route add */ 1091 ip_hdr(skb)->protocol = 0; 1092 msg = (struct igmpmsg *)skb_network_header(skb); 1093 msg->im_vif = vifi; 1094 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1095 /* Add our header */ 1096 igmp = skb_put(skb, sizeof(struct igmphdr)); 1097 igmp->type = assert; 1098 msg->im_msgtype = assert; 1099 igmp->code = 0; 1100 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 1101 skb->transport_header = skb->network_header; 1102 } 1103 1104 rcu_read_lock(); 1105 mroute_sk = rcu_dereference(mrt->mroute_sk); 1106 if (!mroute_sk) { 1107 rcu_read_unlock(); 1108 kfree_skb(skb); 1109 return -EINVAL; 1110 } 1111 1112 igmpmsg_netlink_event(mrt, skb); 1113 1114 /* Deliver to mrouted */ 1115 ret = sock_queue_rcv_skb(mroute_sk, skb); 1116 rcu_read_unlock(); 1117 if (ret < 0) { 1118 net_warn_ratelimited("mroute: pending queue full, dropping entries\n"); 1119 kfree_skb(skb); 1120 } 1121 1122 return ret; 1123 } 1124 1125 /* Queue a packet for resolution. It gets locked cache entry! */ 1126 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, 1127 struct sk_buff *skb, struct net_device *dev) 1128 { 1129 const struct iphdr *iph = ip_hdr(skb); 1130 struct mfc_cache *c; 1131 bool found = false; 1132 int err; 1133 1134 spin_lock_bh(&mfc_unres_lock); 1135 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1136 if (c->mfc_mcastgrp == iph->daddr && 1137 c->mfc_origin == iph->saddr) { 1138 found = true; 1139 break; 1140 } 1141 } 1142 1143 if (!found) { 1144 /* Create a new entry if allowable */ 1145 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 1146 (c = ipmr_cache_alloc_unres()) == NULL) { 1147 spin_unlock_bh(&mfc_unres_lock); 1148 1149 kfree_skb(skb); 1150 return -ENOBUFS; 1151 } 1152 1153 /* Fill in the new cache entry */ 1154 c->_c.mfc_parent = -1; 1155 c->mfc_origin = iph->saddr; 1156 c->mfc_mcastgrp = iph->daddr; 1157 1158 /* Reflect first query at mrouted. */ 1159 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 1160 1161 if (err < 0) { 1162 /* If the report failed throw the cache entry 1163 out - Brad Parker 1164 */ 1165 spin_unlock_bh(&mfc_unres_lock); 1166 1167 ipmr_cache_free(c); 1168 kfree_skb(skb); 1169 return err; 1170 } 1171 1172 atomic_inc(&mrt->cache_resolve_queue_len); 1173 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1174 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1175 1176 if (atomic_read(&mrt->cache_resolve_queue_len) == 1) 1177 mod_timer(&mrt->ipmr_expire_timer, 1178 c->_c.mfc_un.unres.expires); 1179 } 1180 1181 /* See if we can append the packet */ 1182 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1183 kfree_skb(skb); 1184 err = -ENOBUFS; 1185 } else { 1186 if (dev) { 1187 skb->dev = dev; 1188 skb->skb_iif = dev->ifindex; 1189 } 1190 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1191 err = 0; 1192 } 1193 1194 spin_unlock_bh(&mfc_unres_lock); 1195 return err; 1196 } 1197 1198 /* MFC cache manipulation by user space mroute daemon */ 1199 1200 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) 1201 { 1202 struct net *net = read_pnet(&mrt->net); 1203 struct mfc_cache *c; 1204 1205 /* The entries are added/deleted only under RTNL */ 1206 rcu_read_lock(); 1207 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1208 mfc->mfcc_mcastgrp.s_addr, parent); 1209 rcu_read_unlock(); 1210 if (!c) 1211 return -ENOENT; 1212 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params); 1213 list_del_rcu(&c->_c.list); 1214 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id); 1215 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1216 mr_cache_put(&c->_c); 1217 1218 return 0; 1219 } 1220 1221 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, 1222 struct mfcctl *mfc, int mrtsock, int parent) 1223 { 1224 struct mfc_cache *uc, *c; 1225 struct mr_mfc *_uc; 1226 bool found; 1227 int ret; 1228 1229 if (mfc->mfcc_parent >= MAXVIFS) 1230 return -ENFILE; 1231 1232 /* The entries are added/deleted only under RTNL */ 1233 rcu_read_lock(); 1234 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1235 mfc->mfcc_mcastgrp.s_addr, parent); 1236 rcu_read_unlock(); 1237 if (c) { 1238 write_lock_bh(&mrt_lock); 1239 c->_c.mfc_parent = mfc->mfcc_parent; 1240 ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); 1241 if (!mrtsock) 1242 c->_c.mfc_flags |= MFC_STATIC; 1243 write_unlock_bh(&mrt_lock); 1244 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, 1245 mrt->id); 1246 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1247 return 0; 1248 } 1249 1250 if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) && 1251 !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1252 return -EINVAL; 1253 1254 c = ipmr_cache_alloc(); 1255 if (!c) 1256 return -ENOMEM; 1257 1258 c->mfc_origin = mfc->mfcc_origin.s_addr; 1259 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1260 c->_c.mfc_parent = mfc->mfcc_parent; 1261 ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); 1262 if (!mrtsock) 1263 c->_c.mfc_flags |= MFC_STATIC; 1264 1265 ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1266 ipmr_rht_params); 1267 if (ret) { 1268 pr_err("ipmr: rhtable insert error %d\n", ret); 1269 ipmr_cache_free(c); 1270 return ret; 1271 } 1272 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1273 /* Check to see if we resolved a queued list. If so we 1274 * need to send on the frames and tidy up. 1275 */ 1276 found = false; 1277 spin_lock_bh(&mfc_unres_lock); 1278 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1279 uc = (struct mfc_cache *)_uc; 1280 if (uc->mfc_origin == c->mfc_origin && 1281 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1282 list_del(&_uc->list); 1283 atomic_dec(&mrt->cache_resolve_queue_len); 1284 found = true; 1285 break; 1286 } 1287 } 1288 if (list_empty(&mrt->mfc_unres_queue)) 1289 del_timer(&mrt->ipmr_expire_timer); 1290 spin_unlock_bh(&mfc_unres_lock); 1291 1292 if (found) { 1293 ipmr_cache_resolve(net, mrt, uc, c); 1294 ipmr_cache_free(uc); 1295 } 1296 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id); 1297 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1298 return 0; 1299 } 1300 1301 /* Close the multicast socket, and clear the vif tables etc */ 1302 static void mroute_clean_tables(struct mr_table *mrt, bool all) 1303 { 1304 struct net *net = read_pnet(&mrt->net); 1305 struct mr_mfc *c, *tmp; 1306 struct mfc_cache *cache; 1307 LIST_HEAD(list); 1308 int i; 1309 1310 /* Shut down all active vif entries */ 1311 for (i = 0; i < mrt->maxvif; i++) { 1312 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) 1313 continue; 1314 vif_delete(mrt, i, 0, &list); 1315 } 1316 unregister_netdevice_many(&list); 1317 1318 /* Wipe the cache */ 1319 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1320 if (!all && (c->mfc_flags & MFC_STATIC)) 1321 continue; 1322 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1323 list_del_rcu(&c->list); 1324 cache = (struct mfc_cache *)c; 1325 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache, 1326 mrt->id); 1327 mroute_netlink_event(mrt, cache, RTM_DELROUTE); 1328 mr_cache_put(c); 1329 } 1330 1331 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1332 spin_lock_bh(&mfc_unres_lock); 1333 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1334 list_del(&c->list); 1335 cache = (struct mfc_cache *)c; 1336 mroute_netlink_event(mrt, cache, RTM_DELROUTE); 1337 ipmr_destroy_unres(mrt, cache); 1338 } 1339 spin_unlock_bh(&mfc_unres_lock); 1340 } 1341 } 1342 1343 /* called from ip_ra_control(), before an RCU grace period, 1344 * we dont need to call synchronize_rcu() here 1345 */ 1346 static void mrtsock_destruct(struct sock *sk) 1347 { 1348 struct net *net = sock_net(sk); 1349 struct mr_table *mrt; 1350 1351 rtnl_lock(); 1352 ipmr_for_each_table(mrt, net) { 1353 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1354 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1355 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1356 NETCONFA_MC_FORWARDING, 1357 NETCONFA_IFINDEX_ALL, 1358 net->ipv4.devconf_all); 1359 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1360 mroute_clean_tables(mrt, false); 1361 } 1362 } 1363 rtnl_unlock(); 1364 } 1365 1366 /* Socket options and virtual interface manipulation. The whole 1367 * virtual interface system is a complete heap, but unfortunately 1368 * that's how BSD mrouted happens to think. Maybe one day with a proper 1369 * MOSPF/PIM router set up we can clean this up. 1370 */ 1371 1372 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, 1373 unsigned int optlen) 1374 { 1375 struct net *net = sock_net(sk); 1376 int val, ret = 0, parent = 0; 1377 struct mr_table *mrt; 1378 struct vifctl vif; 1379 struct mfcctl mfc; 1380 bool do_wrvifwhole; 1381 u32 uval; 1382 1383 /* There's one exception to the lock - MRT_DONE which needs to unlock */ 1384 rtnl_lock(); 1385 if (sk->sk_type != SOCK_RAW || 1386 inet_sk(sk)->inet_num != IPPROTO_IGMP) { 1387 ret = -EOPNOTSUPP; 1388 goto out_unlock; 1389 } 1390 1391 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1392 if (!mrt) { 1393 ret = -ENOENT; 1394 goto out_unlock; 1395 } 1396 if (optname != MRT_INIT) { 1397 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1398 !ns_capable(net->user_ns, CAP_NET_ADMIN)) { 1399 ret = -EACCES; 1400 goto out_unlock; 1401 } 1402 } 1403 1404 switch (optname) { 1405 case MRT_INIT: 1406 if (optlen != sizeof(int)) { 1407 ret = -EINVAL; 1408 break; 1409 } 1410 if (rtnl_dereference(mrt->mroute_sk)) { 1411 ret = -EADDRINUSE; 1412 break; 1413 } 1414 1415 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1416 if (ret == 0) { 1417 rcu_assign_pointer(mrt->mroute_sk, sk); 1418 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1419 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1420 NETCONFA_MC_FORWARDING, 1421 NETCONFA_IFINDEX_ALL, 1422 net->ipv4.devconf_all); 1423 } 1424 break; 1425 case MRT_DONE: 1426 if (sk != rcu_access_pointer(mrt->mroute_sk)) { 1427 ret = -EACCES; 1428 } else { 1429 /* We need to unlock here because mrtsock_destruct takes 1430 * care of rtnl itself and we can't change that due to 1431 * the IP_ROUTER_ALERT setsockopt which runs without it. 1432 */ 1433 rtnl_unlock(); 1434 ret = ip_ra_control(sk, 0, NULL); 1435 goto out; 1436 } 1437 break; 1438 case MRT_ADD_VIF: 1439 case MRT_DEL_VIF: 1440 if (optlen != sizeof(vif)) { 1441 ret = -EINVAL; 1442 break; 1443 } 1444 if (copy_from_user(&vif, optval, sizeof(vif))) { 1445 ret = -EFAULT; 1446 break; 1447 } 1448 if (vif.vifc_vifi >= MAXVIFS) { 1449 ret = -ENFILE; 1450 break; 1451 } 1452 if (optname == MRT_ADD_VIF) { 1453 ret = vif_add(net, mrt, &vif, 1454 sk == rtnl_dereference(mrt->mroute_sk)); 1455 } else { 1456 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1457 } 1458 break; 1459 /* Manipulate the forwarding caches. These live 1460 * in a sort of kernel/user symbiosis. 1461 */ 1462 case MRT_ADD_MFC: 1463 case MRT_DEL_MFC: 1464 parent = -1; 1465 /* fall through */ 1466 case MRT_ADD_MFC_PROXY: 1467 case MRT_DEL_MFC_PROXY: 1468 if (optlen != sizeof(mfc)) { 1469 ret = -EINVAL; 1470 break; 1471 } 1472 if (copy_from_user(&mfc, optval, sizeof(mfc))) { 1473 ret = -EFAULT; 1474 break; 1475 } 1476 if (parent == 0) 1477 parent = mfc.mfcc_parent; 1478 if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) 1479 ret = ipmr_mfc_delete(mrt, &mfc, parent); 1480 else 1481 ret = ipmr_mfc_add(net, mrt, &mfc, 1482 sk == rtnl_dereference(mrt->mroute_sk), 1483 parent); 1484 break; 1485 /* Control PIM assert. */ 1486 case MRT_ASSERT: 1487 if (optlen != sizeof(val)) { 1488 ret = -EINVAL; 1489 break; 1490 } 1491 if (get_user(val, (int __user *)optval)) { 1492 ret = -EFAULT; 1493 break; 1494 } 1495 mrt->mroute_do_assert = val; 1496 break; 1497 case MRT_PIM: 1498 if (!ipmr_pimsm_enabled()) { 1499 ret = -ENOPROTOOPT; 1500 break; 1501 } 1502 if (optlen != sizeof(val)) { 1503 ret = -EINVAL; 1504 break; 1505 } 1506 if (get_user(val, (int __user *)optval)) { 1507 ret = -EFAULT; 1508 break; 1509 } 1510 1511 do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE); 1512 val = !!val; 1513 if (val != mrt->mroute_do_pim) { 1514 mrt->mroute_do_pim = val; 1515 mrt->mroute_do_assert = val; 1516 mrt->mroute_do_wrvifwhole = do_wrvifwhole; 1517 } 1518 break; 1519 case MRT_TABLE: 1520 if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { 1521 ret = -ENOPROTOOPT; 1522 break; 1523 } 1524 if (optlen != sizeof(uval)) { 1525 ret = -EINVAL; 1526 break; 1527 } 1528 if (get_user(uval, (u32 __user *)optval)) { 1529 ret = -EFAULT; 1530 break; 1531 } 1532 1533 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1534 ret = -EBUSY; 1535 } else { 1536 mrt = ipmr_new_table(net, uval); 1537 if (IS_ERR(mrt)) 1538 ret = PTR_ERR(mrt); 1539 else 1540 raw_sk(sk)->ipmr_table = uval; 1541 } 1542 break; 1543 /* Spurious command, or MRT_VERSION which you cannot set. */ 1544 default: 1545 ret = -ENOPROTOOPT; 1546 } 1547 out_unlock: 1548 rtnl_unlock(); 1549 out: 1550 return ret; 1551 } 1552 1553 /* Getsock opt support for the multicast routing system. */ 1554 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) 1555 { 1556 int olr; 1557 int val; 1558 struct net *net = sock_net(sk); 1559 struct mr_table *mrt; 1560 1561 if (sk->sk_type != SOCK_RAW || 1562 inet_sk(sk)->inet_num != IPPROTO_IGMP) 1563 return -EOPNOTSUPP; 1564 1565 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1566 if (!mrt) 1567 return -ENOENT; 1568 1569 switch (optname) { 1570 case MRT_VERSION: 1571 val = 0x0305; 1572 break; 1573 case MRT_PIM: 1574 if (!ipmr_pimsm_enabled()) 1575 return -ENOPROTOOPT; 1576 val = mrt->mroute_do_pim; 1577 break; 1578 case MRT_ASSERT: 1579 val = mrt->mroute_do_assert; 1580 break; 1581 default: 1582 return -ENOPROTOOPT; 1583 } 1584 1585 if (get_user(olr, optlen)) 1586 return -EFAULT; 1587 olr = min_t(unsigned int, olr, sizeof(int)); 1588 if (olr < 0) 1589 return -EINVAL; 1590 if (put_user(olr, optlen)) 1591 return -EFAULT; 1592 if (copy_to_user(optval, &val, olr)) 1593 return -EFAULT; 1594 return 0; 1595 } 1596 1597 /* The IP multicast ioctl support routines. */ 1598 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) 1599 { 1600 struct sioc_sg_req sr; 1601 struct sioc_vif_req vr; 1602 struct vif_device *vif; 1603 struct mfc_cache *c; 1604 struct net *net = sock_net(sk); 1605 struct mr_table *mrt; 1606 1607 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1608 if (!mrt) 1609 return -ENOENT; 1610 1611 switch (cmd) { 1612 case SIOCGETVIFCNT: 1613 if (copy_from_user(&vr, arg, sizeof(vr))) 1614 return -EFAULT; 1615 if (vr.vifi >= mrt->maxvif) 1616 return -EINVAL; 1617 vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); 1618 read_lock(&mrt_lock); 1619 vif = &mrt->vif_table[vr.vifi]; 1620 if (VIF_EXISTS(mrt, vr.vifi)) { 1621 vr.icount = vif->pkt_in; 1622 vr.ocount = vif->pkt_out; 1623 vr.ibytes = vif->bytes_in; 1624 vr.obytes = vif->bytes_out; 1625 read_unlock(&mrt_lock); 1626 1627 if (copy_to_user(arg, &vr, sizeof(vr))) 1628 return -EFAULT; 1629 return 0; 1630 } 1631 read_unlock(&mrt_lock); 1632 return -EADDRNOTAVAIL; 1633 case SIOCGETSGCNT: 1634 if (copy_from_user(&sr, arg, sizeof(sr))) 1635 return -EFAULT; 1636 1637 rcu_read_lock(); 1638 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1639 if (c) { 1640 sr.pktcnt = c->_c.mfc_un.res.pkt; 1641 sr.bytecnt = c->_c.mfc_un.res.bytes; 1642 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1643 rcu_read_unlock(); 1644 1645 if (copy_to_user(arg, &sr, sizeof(sr))) 1646 return -EFAULT; 1647 return 0; 1648 } 1649 rcu_read_unlock(); 1650 return -EADDRNOTAVAIL; 1651 default: 1652 return -ENOIOCTLCMD; 1653 } 1654 } 1655 1656 #ifdef CONFIG_COMPAT 1657 struct compat_sioc_sg_req { 1658 struct in_addr src; 1659 struct in_addr grp; 1660 compat_ulong_t pktcnt; 1661 compat_ulong_t bytecnt; 1662 compat_ulong_t wrong_if; 1663 }; 1664 1665 struct compat_sioc_vif_req { 1666 vifi_t vifi; /* Which iface */ 1667 compat_ulong_t icount; 1668 compat_ulong_t ocount; 1669 compat_ulong_t ibytes; 1670 compat_ulong_t obytes; 1671 }; 1672 1673 int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1674 { 1675 struct compat_sioc_sg_req sr; 1676 struct compat_sioc_vif_req vr; 1677 struct vif_device *vif; 1678 struct mfc_cache *c; 1679 struct net *net = sock_net(sk); 1680 struct mr_table *mrt; 1681 1682 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1683 if (!mrt) 1684 return -ENOENT; 1685 1686 switch (cmd) { 1687 case SIOCGETVIFCNT: 1688 if (copy_from_user(&vr, arg, sizeof(vr))) 1689 return -EFAULT; 1690 if (vr.vifi >= mrt->maxvif) 1691 return -EINVAL; 1692 vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); 1693 read_lock(&mrt_lock); 1694 vif = &mrt->vif_table[vr.vifi]; 1695 if (VIF_EXISTS(mrt, vr.vifi)) { 1696 vr.icount = vif->pkt_in; 1697 vr.ocount = vif->pkt_out; 1698 vr.ibytes = vif->bytes_in; 1699 vr.obytes = vif->bytes_out; 1700 read_unlock(&mrt_lock); 1701 1702 if (copy_to_user(arg, &vr, sizeof(vr))) 1703 return -EFAULT; 1704 return 0; 1705 } 1706 read_unlock(&mrt_lock); 1707 return -EADDRNOTAVAIL; 1708 case SIOCGETSGCNT: 1709 if (copy_from_user(&sr, arg, sizeof(sr))) 1710 return -EFAULT; 1711 1712 rcu_read_lock(); 1713 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1714 if (c) { 1715 sr.pktcnt = c->_c.mfc_un.res.pkt; 1716 sr.bytecnt = c->_c.mfc_un.res.bytes; 1717 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1718 rcu_read_unlock(); 1719 1720 if (copy_to_user(arg, &sr, sizeof(sr))) 1721 return -EFAULT; 1722 return 0; 1723 } 1724 rcu_read_unlock(); 1725 return -EADDRNOTAVAIL; 1726 default: 1727 return -ENOIOCTLCMD; 1728 } 1729 } 1730 #endif 1731 1732 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1733 { 1734 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1735 struct net *net = dev_net(dev); 1736 struct mr_table *mrt; 1737 struct vif_device *v; 1738 int ct; 1739 1740 if (event != NETDEV_UNREGISTER) 1741 return NOTIFY_DONE; 1742 1743 ipmr_for_each_table(mrt, net) { 1744 v = &mrt->vif_table[0]; 1745 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1746 if (v->dev == dev) 1747 vif_delete(mrt, ct, 1, NULL); 1748 } 1749 } 1750 return NOTIFY_DONE; 1751 } 1752 1753 static struct notifier_block ip_mr_notifier = { 1754 .notifier_call = ipmr_device_event, 1755 }; 1756 1757 /* Encapsulate a packet by attaching a valid IPIP header to it. 1758 * This avoids tunnel drivers and other mess and gives us the speed so 1759 * important for multicast video. 1760 */ 1761 static void ip_encap(struct net *net, struct sk_buff *skb, 1762 __be32 saddr, __be32 daddr) 1763 { 1764 struct iphdr *iph; 1765 const struct iphdr *old_iph = ip_hdr(skb); 1766 1767 skb_push(skb, sizeof(struct iphdr)); 1768 skb->transport_header = skb->network_header; 1769 skb_reset_network_header(skb); 1770 iph = ip_hdr(skb); 1771 1772 iph->version = 4; 1773 iph->tos = old_iph->tos; 1774 iph->ttl = old_iph->ttl; 1775 iph->frag_off = 0; 1776 iph->daddr = daddr; 1777 iph->saddr = saddr; 1778 iph->protocol = IPPROTO_IPIP; 1779 iph->ihl = 5; 1780 iph->tot_len = htons(skb->len); 1781 ip_select_ident(net, skb, NULL); 1782 ip_send_check(iph); 1783 1784 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1785 nf_reset(skb); 1786 } 1787 1788 static inline int ipmr_forward_finish(struct net *net, struct sock *sk, 1789 struct sk_buff *skb) 1790 { 1791 struct ip_options *opt = &(IPCB(skb)->opt); 1792 1793 IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); 1794 IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len); 1795 1796 if (unlikely(opt->optlen)) 1797 ip_forward_options(skb); 1798 1799 return dst_output(net, sk, skb); 1800 } 1801 1802 #ifdef CONFIG_NET_SWITCHDEV 1803 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1804 int in_vifi, int out_vifi) 1805 { 1806 struct vif_device *out_vif = &mrt->vif_table[out_vifi]; 1807 struct vif_device *in_vif = &mrt->vif_table[in_vifi]; 1808 1809 if (!skb->offload_l3_fwd_mark) 1810 return false; 1811 if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len) 1812 return false; 1813 return netdev_phys_item_id_same(&out_vif->dev_parent_id, 1814 &in_vif->dev_parent_id); 1815 } 1816 #else 1817 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1818 int in_vifi, int out_vifi) 1819 { 1820 return false; 1821 } 1822 #endif 1823 1824 /* Processing handlers for ipmr_forward */ 1825 1826 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, 1827 int in_vifi, struct sk_buff *skb, int vifi) 1828 { 1829 const struct iphdr *iph = ip_hdr(skb); 1830 struct vif_device *vif = &mrt->vif_table[vifi]; 1831 struct net_device *dev; 1832 struct rtable *rt; 1833 struct flowi4 fl4; 1834 int encap = 0; 1835 1836 if (!vif->dev) 1837 goto out_free; 1838 1839 if (vif->flags & VIFF_REGISTER) { 1840 vif->pkt_out++; 1841 vif->bytes_out += skb->len; 1842 vif->dev->stats.tx_bytes += skb->len; 1843 vif->dev->stats.tx_packets++; 1844 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); 1845 goto out_free; 1846 } 1847 1848 if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) 1849 goto out_free; 1850 1851 if (vif->flags & VIFF_TUNNEL) { 1852 rt = ip_route_output_ports(net, &fl4, NULL, 1853 vif->remote, vif->local, 1854 0, 0, 1855 IPPROTO_IPIP, 1856 RT_TOS(iph->tos), vif->link); 1857 if (IS_ERR(rt)) 1858 goto out_free; 1859 encap = sizeof(struct iphdr); 1860 } else { 1861 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0, 1862 0, 0, 1863 IPPROTO_IPIP, 1864 RT_TOS(iph->tos), vif->link); 1865 if (IS_ERR(rt)) 1866 goto out_free; 1867 } 1868 1869 dev = rt->dst.dev; 1870 1871 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { 1872 /* Do not fragment multicasts. Alas, IPv4 does not 1873 * allow to send ICMP, so that packets will disappear 1874 * to blackhole. 1875 */ 1876 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 1877 ip_rt_put(rt); 1878 goto out_free; 1879 } 1880 1881 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len; 1882 1883 if (skb_cow(skb, encap)) { 1884 ip_rt_put(rt); 1885 goto out_free; 1886 } 1887 1888 vif->pkt_out++; 1889 vif->bytes_out += skb->len; 1890 1891 skb_dst_drop(skb); 1892 skb_dst_set(skb, &rt->dst); 1893 ip_decrease_ttl(ip_hdr(skb)); 1894 1895 /* FIXME: forward and output firewalls used to be called here. 1896 * What do we do with netfilter? -- RR 1897 */ 1898 if (vif->flags & VIFF_TUNNEL) { 1899 ip_encap(net, skb, vif->local, vif->remote); 1900 /* FIXME: extra output firewall step used to be here. --RR */ 1901 vif->dev->stats.tx_packets++; 1902 vif->dev->stats.tx_bytes += skb->len; 1903 } 1904 1905 IPCB(skb)->flags |= IPSKB_FORWARDED; 1906 1907 /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1908 * not only before forwarding, but after forwarding on all output 1909 * interfaces. It is clear, if mrouter runs a multicasting 1910 * program, it should receive packets not depending to what interface 1911 * program is joined. 1912 * If we will not make it, the program will have to join on all 1913 * interfaces. On the other hand, multihoming host (or router, but 1914 * not mrouter) cannot join to more than one interface - it will 1915 * result in receiving multiple packets. 1916 */ 1917 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, 1918 net, NULL, skb, skb->dev, dev, 1919 ipmr_forward_finish); 1920 return; 1921 1922 out_free: 1923 kfree_skb(skb); 1924 } 1925 1926 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) 1927 { 1928 int ct; 1929 1930 for (ct = mrt->maxvif-1; ct >= 0; ct--) { 1931 if (mrt->vif_table[ct].dev == dev) 1932 break; 1933 } 1934 return ct; 1935 } 1936 1937 /* "local" means that we should preserve one skb (for local delivery) */ 1938 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 1939 struct net_device *dev, struct sk_buff *skb, 1940 struct mfc_cache *c, int local) 1941 { 1942 int true_vifi = ipmr_find_vif(mrt, dev); 1943 int psend = -1; 1944 int vif, ct; 1945 1946 vif = c->_c.mfc_parent; 1947 c->_c.mfc_un.res.pkt++; 1948 c->_c.mfc_un.res.bytes += skb->len; 1949 c->_c.mfc_un.res.lastuse = jiffies; 1950 1951 if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { 1952 struct mfc_cache *cache_proxy; 1953 1954 /* For an (*,G) entry, we only check that the incomming 1955 * interface is part of the static tree. 1956 */ 1957 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 1958 if (cache_proxy && 1959 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) 1960 goto forward; 1961 } 1962 1963 /* Wrong interface: drop packet and (maybe) send PIM assert. */ 1964 if (mrt->vif_table[vif].dev != dev) { 1965 if (rt_is_output_route(skb_rtable(skb))) { 1966 /* It is our own packet, looped back. 1967 * Very complicated situation... 1968 * 1969 * The best workaround until routing daemons will be 1970 * fixed is not to redistribute packet, if it was 1971 * send through wrong interface. It means, that 1972 * multicast applications WILL NOT work for 1973 * (S,G), which have default multicast route pointing 1974 * to wrong oif. In any case, it is not a good 1975 * idea to use multicasting applications on router. 1976 */ 1977 goto dont_forward; 1978 } 1979 1980 c->_c.mfc_un.res.wrong_if++; 1981 1982 if (true_vifi >= 0 && mrt->mroute_do_assert && 1983 /* pimsm uses asserts, when switching from RPT to SPT, 1984 * so that we cannot check that packet arrived on an oif. 1985 * It is bad, but otherwise we would need to move pretty 1986 * large chunk of pimd to kernel. Ough... --ANK 1987 */ 1988 (mrt->mroute_do_pim || 1989 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 1990 time_after(jiffies, 1991 c->_c.mfc_un.res.last_assert + 1992 MFC_ASSERT_THRESH)) { 1993 c->_c.mfc_un.res.last_assert = jiffies; 1994 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); 1995 if (mrt->mroute_do_wrvifwhole) 1996 ipmr_cache_report(mrt, skb, true_vifi, 1997 IGMPMSG_WRVIFWHOLE); 1998 } 1999 goto dont_forward; 2000 } 2001 2002 forward: 2003 mrt->vif_table[vif].pkt_in++; 2004 mrt->vif_table[vif].bytes_in += skb->len; 2005 2006 /* Forward the frame */ 2007 if (c->mfc_origin == htonl(INADDR_ANY) && 2008 c->mfc_mcastgrp == htonl(INADDR_ANY)) { 2009 if (true_vifi >= 0 && 2010 true_vifi != c->_c.mfc_parent && 2011 ip_hdr(skb)->ttl > 2012 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2013 /* It's an (*,*) entry and the packet is not coming from 2014 * the upstream: forward the packet to the upstream 2015 * only. 2016 */ 2017 psend = c->_c.mfc_parent; 2018 goto last_forward; 2019 } 2020 goto dont_forward; 2021 } 2022 for (ct = c->_c.mfc_un.res.maxvif - 1; 2023 ct >= c->_c.mfc_un.res.minvif; ct--) { 2024 /* For (*,G) entry, don't forward to the incoming interface */ 2025 if ((c->mfc_origin != htonl(INADDR_ANY) || 2026 ct != true_vifi) && 2027 ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) { 2028 if (psend != -1) { 2029 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2030 2031 if (skb2) 2032 ipmr_queue_xmit(net, mrt, true_vifi, 2033 skb2, psend); 2034 } 2035 psend = ct; 2036 } 2037 } 2038 last_forward: 2039 if (psend != -1) { 2040 if (local) { 2041 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2042 2043 if (skb2) 2044 ipmr_queue_xmit(net, mrt, true_vifi, skb2, 2045 psend); 2046 } else { 2047 ipmr_queue_xmit(net, mrt, true_vifi, skb, psend); 2048 return; 2049 } 2050 } 2051 2052 dont_forward: 2053 if (!local) 2054 kfree_skb(skb); 2055 } 2056 2057 static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) 2058 { 2059 struct rtable *rt = skb_rtable(skb); 2060 struct iphdr *iph = ip_hdr(skb); 2061 struct flowi4 fl4 = { 2062 .daddr = iph->daddr, 2063 .saddr = iph->saddr, 2064 .flowi4_tos = RT_TOS(iph->tos), 2065 .flowi4_oif = (rt_is_output_route(rt) ? 2066 skb->dev->ifindex : 0), 2067 .flowi4_iif = (rt_is_output_route(rt) ? 2068 LOOPBACK_IFINDEX : 2069 skb->dev->ifindex), 2070 .flowi4_mark = skb->mark, 2071 }; 2072 struct mr_table *mrt; 2073 int err; 2074 2075 err = ipmr_fib_lookup(net, &fl4, &mrt); 2076 if (err) 2077 return ERR_PTR(err); 2078 return mrt; 2079 } 2080 2081 /* Multicast packets for forwarding arrive here 2082 * Called with rcu_read_lock(); 2083 */ 2084 int ip_mr_input(struct sk_buff *skb) 2085 { 2086 struct mfc_cache *cache; 2087 struct net *net = dev_net(skb->dev); 2088 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 2089 struct mr_table *mrt; 2090 struct net_device *dev; 2091 2092 /* skb->dev passed in is the loX master dev for vrfs. 2093 * As there are no vifs associated with loopback devices, 2094 * get the proper interface that does have a vif associated with it. 2095 */ 2096 dev = skb->dev; 2097 if (netif_is_l3_master(skb->dev)) { 2098 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2099 if (!dev) { 2100 kfree_skb(skb); 2101 return -ENODEV; 2102 } 2103 } 2104 2105 /* Packet is looped back after forward, it should not be 2106 * forwarded second time, but still can be delivered locally. 2107 */ 2108 if (IPCB(skb)->flags & IPSKB_FORWARDED) 2109 goto dont_forward; 2110 2111 mrt = ipmr_rt_fib_lookup(net, skb); 2112 if (IS_ERR(mrt)) { 2113 kfree_skb(skb); 2114 return PTR_ERR(mrt); 2115 } 2116 if (!local) { 2117 if (IPCB(skb)->opt.router_alert) { 2118 if (ip_call_ra_chain(skb)) 2119 return 0; 2120 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) { 2121 /* IGMPv1 (and broken IGMPv2 implementations sort of 2122 * Cisco IOS <= 11.2(8)) do not put router alert 2123 * option to IGMP packets destined to routable 2124 * groups. It is very bad, because it means 2125 * that we can forward NO IGMP messages. 2126 */ 2127 struct sock *mroute_sk; 2128 2129 mroute_sk = rcu_dereference(mrt->mroute_sk); 2130 if (mroute_sk) { 2131 nf_reset(skb); 2132 raw_rcv(mroute_sk, skb); 2133 return 0; 2134 } 2135 } 2136 } 2137 2138 /* already under rcu_read_lock() */ 2139 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 2140 if (!cache) { 2141 int vif = ipmr_find_vif(mrt, dev); 2142 2143 if (vif >= 0) 2144 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, 2145 vif); 2146 } 2147 2148 /* No usable cache entry */ 2149 if (!cache) { 2150 int vif; 2151 2152 if (local) { 2153 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2154 ip_local_deliver(skb); 2155 if (!skb2) 2156 return -ENOBUFS; 2157 skb = skb2; 2158 } 2159 2160 read_lock(&mrt_lock); 2161 vif = ipmr_find_vif(mrt, dev); 2162 if (vif >= 0) { 2163 int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev); 2164 read_unlock(&mrt_lock); 2165 2166 return err2; 2167 } 2168 read_unlock(&mrt_lock); 2169 kfree_skb(skb); 2170 return -ENODEV; 2171 } 2172 2173 read_lock(&mrt_lock); 2174 ip_mr_forward(net, mrt, dev, skb, cache, local); 2175 read_unlock(&mrt_lock); 2176 2177 if (local) 2178 return ip_local_deliver(skb); 2179 2180 return 0; 2181 2182 dont_forward: 2183 if (local) 2184 return ip_local_deliver(skb); 2185 kfree_skb(skb); 2186 return 0; 2187 } 2188 2189 #ifdef CONFIG_IP_PIMSM_V1 2190 /* Handle IGMP messages of PIMv1 */ 2191 int pim_rcv_v1(struct sk_buff *skb) 2192 { 2193 struct igmphdr *pim; 2194 struct net *net = dev_net(skb->dev); 2195 struct mr_table *mrt; 2196 2197 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2198 goto drop; 2199 2200 pim = igmp_hdr(skb); 2201 2202 mrt = ipmr_rt_fib_lookup(net, skb); 2203 if (IS_ERR(mrt)) 2204 goto drop; 2205 if (!mrt->mroute_do_pim || 2206 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 2207 goto drop; 2208 2209 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2210 drop: 2211 kfree_skb(skb); 2212 } 2213 return 0; 2214 } 2215 #endif 2216 2217 #ifdef CONFIG_IP_PIMSM_V2 2218 static int pim_rcv(struct sk_buff *skb) 2219 { 2220 struct pimreghdr *pim; 2221 struct net *net = dev_net(skb->dev); 2222 struct mr_table *mrt; 2223 2224 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2225 goto drop; 2226 2227 pim = (struct pimreghdr *)skb_transport_header(skb); 2228 if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) || 2229 (pim->flags & PIM_NULL_REGISTER) || 2230 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 2231 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 2232 goto drop; 2233 2234 mrt = ipmr_rt_fib_lookup(net, skb); 2235 if (IS_ERR(mrt)) 2236 goto drop; 2237 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2238 drop: 2239 kfree_skb(skb); 2240 } 2241 return 0; 2242 } 2243 #endif 2244 2245 int ipmr_get_route(struct net *net, struct sk_buff *skb, 2246 __be32 saddr, __be32 daddr, 2247 struct rtmsg *rtm, u32 portid) 2248 { 2249 struct mfc_cache *cache; 2250 struct mr_table *mrt; 2251 int err; 2252 2253 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2254 if (!mrt) 2255 return -ENOENT; 2256 2257 rcu_read_lock(); 2258 cache = ipmr_cache_find(mrt, saddr, daddr); 2259 if (!cache && skb->dev) { 2260 int vif = ipmr_find_vif(mrt, skb->dev); 2261 2262 if (vif >= 0) 2263 cache = ipmr_cache_find_any(mrt, daddr, vif); 2264 } 2265 if (!cache) { 2266 struct sk_buff *skb2; 2267 struct iphdr *iph; 2268 struct net_device *dev; 2269 int vif = -1; 2270 2271 dev = skb->dev; 2272 read_lock(&mrt_lock); 2273 if (dev) 2274 vif = ipmr_find_vif(mrt, dev); 2275 if (vif < 0) { 2276 read_unlock(&mrt_lock); 2277 rcu_read_unlock(); 2278 return -ENODEV; 2279 } 2280 skb2 = skb_clone(skb, GFP_ATOMIC); 2281 if (!skb2) { 2282 read_unlock(&mrt_lock); 2283 rcu_read_unlock(); 2284 return -ENOMEM; 2285 } 2286 2287 NETLINK_CB(skb2).portid = portid; 2288 skb_push(skb2, sizeof(struct iphdr)); 2289 skb_reset_network_header(skb2); 2290 iph = ip_hdr(skb2); 2291 iph->ihl = sizeof(struct iphdr) >> 2; 2292 iph->saddr = saddr; 2293 iph->daddr = daddr; 2294 iph->version = 0; 2295 err = ipmr_cache_unresolved(mrt, vif, skb2, dev); 2296 read_unlock(&mrt_lock); 2297 rcu_read_unlock(); 2298 return err; 2299 } 2300 2301 read_lock(&mrt_lock); 2302 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2303 read_unlock(&mrt_lock); 2304 rcu_read_unlock(); 2305 return err; 2306 } 2307 2308 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2309 u32 portid, u32 seq, struct mfc_cache *c, int cmd, 2310 int flags) 2311 { 2312 struct nlmsghdr *nlh; 2313 struct rtmsg *rtm; 2314 int err; 2315 2316 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2317 if (!nlh) 2318 return -EMSGSIZE; 2319 2320 rtm = nlmsg_data(nlh); 2321 rtm->rtm_family = RTNL_FAMILY_IPMR; 2322 rtm->rtm_dst_len = 32; 2323 rtm->rtm_src_len = 32; 2324 rtm->rtm_tos = 0; 2325 rtm->rtm_table = mrt->id; 2326 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2327 goto nla_put_failure; 2328 rtm->rtm_type = RTN_MULTICAST; 2329 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2330 if (c->_c.mfc_flags & MFC_STATIC) 2331 rtm->rtm_protocol = RTPROT_STATIC; 2332 else 2333 rtm->rtm_protocol = RTPROT_MROUTED; 2334 rtm->rtm_flags = 0; 2335 2336 if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || 2337 nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) 2338 goto nla_put_failure; 2339 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2340 /* do not break the dump if cache is unresolved */ 2341 if (err < 0 && err != -ENOENT) 2342 goto nla_put_failure; 2343 2344 nlmsg_end(skb, nlh); 2345 return 0; 2346 2347 nla_put_failure: 2348 nlmsg_cancel(skb, nlh); 2349 return -EMSGSIZE; 2350 } 2351 2352 static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2353 u32 portid, u32 seq, struct mr_mfc *c, int cmd, 2354 int flags) 2355 { 2356 return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c, 2357 cmd, flags); 2358 } 2359 2360 static size_t mroute_msgsize(bool unresolved, int maxvif) 2361 { 2362 size_t len = 2363 NLMSG_ALIGN(sizeof(struct rtmsg)) 2364 + nla_total_size(4) /* RTA_TABLE */ 2365 + nla_total_size(4) /* RTA_SRC */ 2366 + nla_total_size(4) /* RTA_DST */ 2367 ; 2368 2369 if (!unresolved) 2370 len = len 2371 + nla_total_size(4) /* RTA_IIF */ 2372 + nla_total_size(0) /* RTA_MULTIPATH */ 2373 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2374 /* RTA_MFC_STATS */ 2375 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2376 ; 2377 2378 return len; 2379 } 2380 2381 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 2382 int cmd) 2383 { 2384 struct net *net = read_pnet(&mrt->net); 2385 struct sk_buff *skb; 2386 int err = -ENOBUFS; 2387 2388 skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS, 2389 mrt->maxvif), 2390 GFP_ATOMIC); 2391 if (!skb) 2392 goto errout; 2393 2394 err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2395 if (err < 0) 2396 goto errout; 2397 2398 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC); 2399 return; 2400 2401 errout: 2402 kfree_skb(skb); 2403 if (err < 0) 2404 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err); 2405 } 2406 2407 static size_t igmpmsg_netlink_msgsize(size_t payloadlen) 2408 { 2409 size_t len = 2410 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2411 + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */ 2412 + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */ 2413 + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */ 2414 + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */ 2415 /* IPMRA_CREPORT_PKT */ 2416 + nla_total_size(payloadlen) 2417 ; 2418 2419 return len; 2420 } 2421 2422 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) 2423 { 2424 struct net *net = read_pnet(&mrt->net); 2425 struct nlmsghdr *nlh; 2426 struct rtgenmsg *rtgenm; 2427 struct igmpmsg *msg; 2428 struct sk_buff *skb; 2429 struct nlattr *nla; 2430 int payloadlen; 2431 2432 payloadlen = pkt->len - sizeof(struct igmpmsg); 2433 msg = (struct igmpmsg *)skb_network_header(pkt); 2434 2435 skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2436 if (!skb) 2437 goto errout; 2438 2439 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2440 sizeof(struct rtgenmsg), 0); 2441 if (!nlh) 2442 goto errout; 2443 rtgenm = nlmsg_data(nlh); 2444 rtgenm->rtgen_family = RTNL_FAMILY_IPMR; 2445 if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) || 2446 nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif) || 2447 nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR, 2448 msg->im_src.s_addr) || 2449 nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR, 2450 msg->im_dst.s_addr)) 2451 goto nla_put_failure; 2452 2453 nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen); 2454 if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg), 2455 nla_data(nla), payloadlen)) 2456 goto nla_put_failure; 2457 2458 nlmsg_end(skb, nlh); 2459 2460 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC); 2461 return; 2462 2463 nla_put_failure: 2464 nlmsg_cancel(skb, nlh); 2465 errout: 2466 kfree_skb(skb); 2467 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); 2468 } 2469 2470 static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2471 struct netlink_ext_ack *extack) 2472 { 2473 struct net *net = sock_net(in_skb->sk); 2474 struct nlattr *tb[RTA_MAX + 1]; 2475 struct sk_buff *skb = NULL; 2476 struct mfc_cache *cache; 2477 struct mr_table *mrt; 2478 struct rtmsg *rtm; 2479 __be32 src, grp; 2480 u32 tableid; 2481 int err; 2482 2483 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, 2484 rtm_ipv4_policy, extack); 2485 if (err < 0) 2486 goto errout; 2487 2488 rtm = nlmsg_data(nlh); 2489 2490 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; 2491 grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; 2492 tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; 2493 2494 mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); 2495 if (!mrt) { 2496 err = -ENOENT; 2497 goto errout_free; 2498 } 2499 2500 /* entries are added/deleted only under RTNL */ 2501 rcu_read_lock(); 2502 cache = ipmr_cache_find(mrt, src, grp); 2503 rcu_read_unlock(); 2504 if (!cache) { 2505 err = -ENOENT; 2506 goto errout_free; 2507 } 2508 2509 skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL); 2510 if (!skb) { 2511 err = -ENOBUFS; 2512 goto errout_free; 2513 } 2514 2515 err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, 2516 nlh->nlmsg_seq, cache, 2517 RTM_NEWROUTE, 0); 2518 if (err < 0) 2519 goto errout_free; 2520 2521 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2522 2523 errout: 2524 return err; 2525 2526 errout_free: 2527 kfree_skb(skb); 2528 goto errout; 2529 } 2530 2531 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2532 { 2533 struct fib_dump_filter filter = {}; 2534 int err; 2535 2536 if (cb->strict_check) { 2537 err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh, 2538 &filter, cb); 2539 if (err < 0) 2540 return err; 2541 } 2542 2543 if (filter.table_id) { 2544 struct mr_table *mrt; 2545 2546 mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id); 2547 if (!mrt) { 2548 if (filter.dump_all_families) 2549 return skb->len; 2550 2551 NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist"); 2552 return -ENOENT; 2553 } 2554 err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute, 2555 &mfc_unres_lock, &filter); 2556 return skb->len ? : err; 2557 } 2558 2559 return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter, 2560 _ipmr_fill_mroute, &mfc_unres_lock, &filter); 2561 } 2562 2563 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { 2564 [RTA_SRC] = { .type = NLA_U32 }, 2565 [RTA_DST] = { .type = NLA_U32 }, 2566 [RTA_IIF] = { .type = NLA_U32 }, 2567 [RTA_TABLE] = { .type = NLA_U32 }, 2568 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2569 }; 2570 2571 static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol) 2572 { 2573 switch (rtm_protocol) { 2574 case RTPROT_STATIC: 2575 case RTPROT_MROUTED: 2576 return true; 2577 } 2578 return false; 2579 } 2580 2581 static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc) 2582 { 2583 struct rtnexthop *rtnh = nla_data(nla); 2584 int remaining = nla_len(nla), vifi = 0; 2585 2586 while (rtnh_ok(rtnh, remaining)) { 2587 mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops; 2588 if (++vifi == MAXVIFS) 2589 break; 2590 rtnh = rtnh_next(rtnh, &remaining); 2591 } 2592 2593 return remaining > 0 ? -EINVAL : vifi; 2594 } 2595 2596 /* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */ 2597 static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, 2598 struct mfcctl *mfcc, int *mrtsock, 2599 struct mr_table **mrtret, 2600 struct netlink_ext_ack *extack) 2601 { 2602 struct net_device *dev = NULL; 2603 u32 tblid = RT_TABLE_DEFAULT; 2604 struct mr_table *mrt; 2605 struct nlattr *attr; 2606 struct rtmsg *rtm; 2607 int ret, rem; 2608 2609 ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy, 2610 extack); 2611 if (ret < 0) 2612 goto out; 2613 rtm = nlmsg_data(nlh); 2614 2615 ret = -EINVAL; 2616 if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 || 2617 rtm->rtm_type != RTN_MULTICAST || 2618 rtm->rtm_scope != RT_SCOPE_UNIVERSE || 2619 !ipmr_rtm_validate_proto(rtm->rtm_protocol)) 2620 goto out; 2621 2622 memset(mfcc, 0, sizeof(*mfcc)); 2623 mfcc->mfcc_parent = -1; 2624 ret = 0; 2625 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) { 2626 switch (nla_type(attr)) { 2627 case RTA_SRC: 2628 mfcc->mfcc_origin.s_addr = nla_get_be32(attr); 2629 break; 2630 case RTA_DST: 2631 mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); 2632 break; 2633 case RTA_IIF: 2634 dev = __dev_get_by_index(net, nla_get_u32(attr)); 2635 if (!dev) { 2636 ret = -ENODEV; 2637 goto out; 2638 } 2639 break; 2640 case RTA_MULTIPATH: 2641 if (ipmr_nla_get_ttls(attr, mfcc) < 0) { 2642 ret = -EINVAL; 2643 goto out; 2644 } 2645 break; 2646 case RTA_PREFSRC: 2647 ret = 1; 2648 break; 2649 case RTA_TABLE: 2650 tblid = nla_get_u32(attr); 2651 break; 2652 } 2653 } 2654 mrt = ipmr_get_table(net, tblid); 2655 if (!mrt) { 2656 ret = -ENOENT; 2657 goto out; 2658 } 2659 *mrtret = mrt; 2660 *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; 2661 if (dev) 2662 mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); 2663 2664 out: 2665 return ret; 2666 } 2667 2668 /* takes care of both newroute and delroute */ 2669 static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, 2670 struct netlink_ext_ack *extack) 2671 { 2672 struct net *net = sock_net(skb->sk); 2673 int ret, mrtsock, parent; 2674 struct mr_table *tbl; 2675 struct mfcctl mfcc; 2676 2677 mrtsock = 0; 2678 tbl = NULL; 2679 ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack); 2680 if (ret < 0) 2681 return ret; 2682 2683 parent = ret ? mfcc.mfcc_parent : -1; 2684 if (nlh->nlmsg_type == RTM_NEWROUTE) 2685 return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); 2686 else 2687 return ipmr_mfc_delete(tbl, &mfcc, parent); 2688 } 2689 2690 static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) 2691 { 2692 u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len); 2693 2694 if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) || 2695 nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) || 2696 nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM, 2697 mrt->mroute_reg_vif_num) || 2698 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT, 2699 mrt->mroute_do_assert) || 2700 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim) || 2701 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE, 2702 mrt->mroute_do_wrvifwhole)) 2703 return false; 2704 2705 return true; 2706 } 2707 2708 static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) 2709 { 2710 struct nlattr *vif_nest; 2711 struct vif_device *vif; 2712 2713 /* if the VIF doesn't exist just continue */ 2714 if (!VIF_EXISTS(mrt, vifid)) 2715 return true; 2716 2717 vif = &mrt->vif_table[vifid]; 2718 vif_nest = nla_nest_start(skb, IPMRA_VIF); 2719 if (!vif_nest) 2720 return false; 2721 if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) || 2722 nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || 2723 nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || 2724 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in, 2725 IPMRA_VIFA_PAD) || 2726 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out, 2727 IPMRA_VIFA_PAD) || 2728 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in, 2729 IPMRA_VIFA_PAD) || 2730 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out, 2731 IPMRA_VIFA_PAD) || 2732 nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) || 2733 nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) { 2734 nla_nest_cancel(skb, vif_nest); 2735 return false; 2736 } 2737 nla_nest_end(skb, vif_nest); 2738 2739 return true; 2740 } 2741 2742 static int ipmr_valid_dumplink(const struct nlmsghdr *nlh, 2743 struct netlink_ext_ack *extack) 2744 { 2745 struct ifinfomsg *ifm; 2746 2747 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { 2748 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for ipmr link dump"); 2749 return -EINVAL; 2750 } 2751 2752 if (nlmsg_attrlen(nlh, sizeof(*ifm))) { 2753 NL_SET_ERR_MSG(extack, "Invalid data after header in ipmr link dump"); 2754 return -EINVAL; 2755 } 2756 2757 ifm = nlmsg_data(nlh); 2758 if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || 2759 ifm->ifi_change || ifm->ifi_index) { 2760 NL_SET_ERR_MSG(extack, "Invalid values in header for ipmr link dump request"); 2761 return -EINVAL; 2762 } 2763 2764 return 0; 2765 } 2766 2767 static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) 2768 { 2769 struct net *net = sock_net(skb->sk); 2770 struct nlmsghdr *nlh = NULL; 2771 unsigned int t = 0, s_t; 2772 unsigned int e = 0, s_e; 2773 struct mr_table *mrt; 2774 2775 if (cb->strict_check) { 2776 int err = ipmr_valid_dumplink(cb->nlh, cb->extack); 2777 2778 if (err < 0) 2779 return err; 2780 } 2781 2782 s_t = cb->args[0]; 2783 s_e = cb->args[1]; 2784 2785 ipmr_for_each_table(mrt, net) { 2786 struct nlattr *vifs, *af; 2787 struct ifinfomsg *hdr; 2788 u32 i; 2789 2790 if (t < s_t) 2791 goto skip_table; 2792 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 2793 cb->nlh->nlmsg_seq, RTM_NEWLINK, 2794 sizeof(*hdr), NLM_F_MULTI); 2795 if (!nlh) 2796 break; 2797 2798 hdr = nlmsg_data(nlh); 2799 memset(hdr, 0, sizeof(*hdr)); 2800 hdr->ifi_family = RTNL_FAMILY_IPMR; 2801 2802 af = nla_nest_start(skb, IFLA_AF_SPEC); 2803 if (!af) { 2804 nlmsg_cancel(skb, nlh); 2805 goto out; 2806 } 2807 2808 if (!ipmr_fill_table(mrt, skb)) { 2809 nlmsg_cancel(skb, nlh); 2810 goto out; 2811 } 2812 2813 vifs = nla_nest_start(skb, IPMRA_TABLE_VIFS); 2814 if (!vifs) { 2815 nla_nest_end(skb, af); 2816 nlmsg_end(skb, nlh); 2817 goto out; 2818 } 2819 for (i = 0; i < mrt->maxvif; i++) { 2820 if (e < s_e) 2821 goto skip_entry; 2822 if (!ipmr_fill_vif(mrt, i, skb)) { 2823 nla_nest_end(skb, vifs); 2824 nla_nest_end(skb, af); 2825 nlmsg_end(skb, nlh); 2826 goto out; 2827 } 2828 skip_entry: 2829 e++; 2830 } 2831 s_e = 0; 2832 e = 0; 2833 nla_nest_end(skb, vifs); 2834 nla_nest_end(skb, af); 2835 nlmsg_end(skb, nlh); 2836 skip_table: 2837 t++; 2838 } 2839 2840 out: 2841 cb->args[1] = e; 2842 cb->args[0] = t; 2843 2844 return skb->len; 2845 } 2846 2847 #ifdef CONFIG_PROC_FS 2848 /* The /proc interfaces to multicast routing : 2849 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif 2850 */ 2851 2852 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 2853 __acquires(mrt_lock) 2854 { 2855 struct mr_vif_iter *iter = seq->private; 2856 struct net *net = seq_file_net(seq); 2857 struct mr_table *mrt; 2858 2859 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2860 if (!mrt) 2861 return ERR_PTR(-ENOENT); 2862 2863 iter->mrt = mrt; 2864 2865 read_lock(&mrt_lock); 2866 return mr_vif_seq_start(seq, pos); 2867 } 2868 2869 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 2870 __releases(mrt_lock) 2871 { 2872 read_unlock(&mrt_lock); 2873 } 2874 2875 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 2876 { 2877 struct mr_vif_iter *iter = seq->private; 2878 struct mr_table *mrt = iter->mrt; 2879 2880 if (v == SEQ_START_TOKEN) { 2881 seq_puts(seq, 2882 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 2883 } else { 2884 const struct vif_device *vif = v; 2885 const char *name = vif->dev ? 2886 vif->dev->name : "none"; 2887 2888 seq_printf(seq, 2889 "%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 2890 vif - mrt->vif_table, 2891 name, vif->bytes_in, vif->pkt_in, 2892 vif->bytes_out, vif->pkt_out, 2893 vif->flags, vif->local, vif->remote); 2894 } 2895 return 0; 2896 } 2897 2898 static const struct seq_operations ipmr_vif_seq_ops = { 2899 .start = ipmr_vif_seq_start, 2900 .next = mr_vif_seq_next, 2901 .stop = ipmr_vif_seq_stop, 2902 .show = ipmr_vif_seq_show, 2903 }; 2904 2905 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 2906 { 2907 struct net *net = seq_file_net(seq); 2908 struct mr_table *mrt; 2909 2910 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2911 if (!mrt) 2912 return ERR_PTR(-ENOENT); 2913 2914 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 2915 } 2916 2917 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 2918 { 2919 int n; 2920 2921 if (v == SEQ_START_TOKEN) { 2922 seq_puts(seq, 2923 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 2924 } else { 2925 const struct mfc_cache *mfc = v; 2926 const struct mr_mfc_iter *it = seq->private; 2927 const struct mr_table *mrt = it->mrt; 2928 2929 seq_printf(seq, "%08X %08X %-3hd", 2930 (__force u32) mfc->mfc_mcastgrp, 2931 (__force u32) mfc->mfc_origin, 2932 mfc->_c.mfc_parent); 2933 2934 if (it->cache != &mrt->mfc_unres_queue) { 2935 seq_printf(seq, " %8lu %8lu %8lu", 2936 mfc->_c.mfc_un.res.pkt, 2937 mfc->_c.mfc_un.res.bytes, 2938 mfc->_c.mfc_un.res.wrong_if); 2939 for (n = mfc->_c.mfc_un.res.minvif; 2940 n < mfc->_c.mfc_un.res.maxvif; n++) { 2941 if (VIF_EXISTS(mrt, n) && 2942 mfc->_c.mfc_un.res.ttls[n] < 255) 2943 seq_printf(seq, 2944 " %2d:%-3d", 2945 n, mfc->_c.mfc_un.res.ttls[n]); 2946 } 2947 } else { 2948 /* unresolved mfc_caches don't contain 2949 * pkt, bytes and wrong_if values 2950 */ 2951 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 2952 } 2953 seq_putc(seq, '\n'); 2954 } 2955 return 0; 2956 } 2957 2958 static const struct seq_operations ipmr_mfc_seq_ops = { 2959 .start = ipmr_mfc_seq_start, 2960 .next = mr_mfc_seq_next, 2961 .stop = mr_mfc_seq_stop, 2962 .show = ipmr_mfc_seq_show, 2963 }; 2964 #endif 2965 2966 #ifdef CONFIG_IP_PIMSM_V2 2967 static const struct net_protocol pim_protocol = { 2968 .handler = pim_rcv, 2969 .netns_ok = 1, 2970 }; 2971 #endif 2972 2973 static unsigned int ipmr_seq_read(struct net *net) 2974 { 2975 ASSERT_RTNL(); 2976 2977 return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net); 2978 } 2979 2980 static int ipmr_dump(struct net *net, struct notifier_block *nb) 2981 { 2982 return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump, 2983 ipmr_mr_table_iter, &mrt_lock); 2984 } 2985 2986 static const struct fib_notifier_ops ipmr_notifier_ops_template = { 2987 .family = RTNL_FAMILY_IPMR, 2988 .fib_seq_read = ipmr_seq_read, 2989 .fib_dump = ipmr_dump, 2990 .owner = THIS_MODULE, 2991 }; 2992 2993 static int __net_init ipmr_notifier_init(struct net *net) 2994 { 2995 struct fib_notifier_ops *ops; 2996 2997 net->ipv4.ipmr_seq = 0; 2998 2999 ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net); 3000 if (IS_ERR(ops)) 3001 return PTR_ERR(ops); 3002 net->ipv4.ipmr_notifier_ops = ops; 3003 3004 return 0; 3005 } 3006 3007 static void __net_exit ipmr_notifier_exit(struct net *net) 3008 { 3009 fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops); 3010 net->ipv4.ipmr_notifier_ops = NULL; 3011 } 3012 3013 /* Setup for IP multicast routing */ 3014 static int __net_init ipmr_net_init(struct net *net) 3015 { 3016 int err; 3017 3018 err = ipmr_notifier_init(net); 3019 if (err) 3020 goto ipmr_notifier_fail; 3021 3022 err = ipmr_rules_init(net); 3023 if (err < 0) 3024 goto ipmr_rules_fail; 3025 3026 #ifdef CONFIG_PROC_FS 3027 err = -ENOMEM; 3028 if (!proc_create_net("ip_mr_vif", 0, net->proc_net, &ipmr_vif_seq_ops, 3029 sizeof(struct mr_vif_iter))) 3030 goto proc_vif_fail; 3031 if (!proc_create_net("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 3032 sizeof(struct mr_mfc_iter))) 3033 goto proc_cache_fail; 3034 #endif 3035 return 0; 3036 3037 #ifdef CONFIG_PROC_FS 3038 proc_cache_fail: 3039 remove_proc_entry("ip_mr_vif", net->proc_net); 3040 proc_vif_fail: 3041 ipmr_rules_exit(net); 3042 #endif 3043 ipmr_rules_fail: 3044 ipmr_notifier_exit(net); 3045 ipmr_notifier_fail: 3046 return err; 3047 } 3048 3049 static void __net_exit ipmr_net_exit(struct net *net) 3050 { 3051 #ifdef CONFIG_PROC_FS 3052 remove_proc_entry("ip_mr_cache", net->proc_net); 3053 remove_proc_entry("ip_mr_vif", net->proc_net); 3054 #endif 3055 ipmr_notifier_exit(net); 3056 ipmr_rules_exit(net); 3057 } 3058 3059 static struct pernet_operations ipmr_net_ops = { 3060 .init = ipmr_net_init, 3061 .exit = ipmr_net_exit, 3062 }; 3063 3064 int __init ip_mr_init(void) 3065 { 3066 int err; 3067 3068 mrt_cachep = kmem_cache_create("ip_mrt_cache", 3069 sizeof(struct mfc_cache), 3070 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, 3071 NULL); 3072 3073 err = register_pernet_subsys(&ipmr_net_ops); 3074 if (err) 3075 goto reg_pernet_fail; 3076 3077 err = register_netdevice_notifier(&ip_mr_notifier); 3078 if (err) 3079 goto reg_notif_fail; 3080 #ifdef CONFIG_IP_PIMSM_V2 3081 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { 3082 pr_err("%s: can't add PIM protocol\n", __func__); 3083 err = -EAGAIN; 3084 goto add_proto_fail; 3085 } 3086 #endif 3087 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, 3088 ipmr_rtm_getroute, ipmr_rtm_dumproute, 0); 3089 rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, 3090 ipmr_rtm_route, NULL, 0); 3091 rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, 3092 ipmr_rtm_route, NULL, 0); 3093 3094 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK, 3095 NULL, ipmr_rtm_dumplink, 0); 3096 return 0; 3097 3098 #ifdef CONFIG_IP_PIMSM_V2 3099 add_proto_fail: 3100 unregister_netdevice_notifier(&ip_mr_notifier); 3101 #endif 3102 reg_notif_fail: 3103 unregister_pernet_subsys(&ipmr_net_ops); 3104 reg_pernet_fail: 3105 kmem_cache_destroy(mrt_cachep); 3106 return err; 3107 } 3108