1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IP multicast routing support for mrouted 3.6/3.8 4 * 5 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * Linux Consultancy and Custom Driver Development 7 * 8 * Fixes: 9 * Michael Chastain : Incorrect size of copying. 10 * Alan Cox : Added the cache manager code 11 * Alan Cox : Fixed the clone/copy bug and device race. 12 * Mike McLagan : Routing by source 13 * Malcolm Beattie : Buffer handling fixes. 14 * Alexey Kuznetsov : Double buffer free and other fixes. 15 * SVR Anand : Fixed several multicast bugs and problems. 16 * Alexey Kuznetsov : Status, optimisations and more. 17 * Brad Parker : Better behaviour on mrouted upcall 18 * overflow. 19 * Carlos Picoto : PIMv1 Support 20 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 21 * Relax this requirement to work with older peers. 22 */ 23 24 #include <linux/uaccess.h> 25 #include <linux/types.h> 26 #include <linux/cache.h> 27 #include <linux/capability.h> 28 #include <linux/errno.h> 29 #include <linux/mm.h> 30 #include <linux/kernel.h> 31 #include <linux/fcntl.h> 32 #include <linux/stat.h> 33 #include <linux/socket.h> 34 #include <linux/in.h> 35 #include <linux/inet.h> 36 #include <linux/netdevice.h> 37 #include <linux/inetdevice.h> 38 #include <linux/igmp.h> 39 #include <linux/proc_fs.h> 40 #include <linux/seq_file.h> 41 #include <linux/mroute.h> 42 #include <linux/init.h> 43 #include <linux/if_ether.h> 44 #include <linux/slab.h> 45 #include <net/net_namespace.h> 46 #include <net/ip.h> 47 #include <net/protocol.h> 48 #include <linux/skbuff.h> 49 #include <net/route.h> 50 #include <net/icmp.h> 51 #include <net/udp.h> 52 #include <net/raw.h> 53 #include <linux/notifier.h> 54 #include <linux/if_arp.h> 55 #include <linux/netfilter_ipv4.h> 56 #include <linux/compat.h> 57 #include <linux/export.h> 58 #include <linux/rhashtable.h> 59 #include <net/ip_tunnels.h> 60 #include <net/checksum.h> 61 #include <net/netlink.h> 62 #include <net/fib_rules.h> 63 #include <linux/netconf.h> 64 #include <net/rtnh.h> 65 66 #include <linux/nospec.h> 67 68 struct ipmr_rule { 69 struct fib_rule common; 70 }; 71 72 struct ipmr_result { 73 struct mr_table *mrt; 74 }; 75 76 /* Big lock, protecting vif table, mrt cache and mroute socket state. 77 * Note that the changes are semaphored via rtnl_lock. 78 */ 79 80 static DEFINE_RWLOCK(mrt_lock); 81 82 /* Multicast router control variables */ 83 84 /* Special spinlock for queue of unresolved entries */ 85 static DEFINE_SPINLOCK(mfc_unres_lock); 86 87 /* We return to original Alan's scheme. Hash table of resolved 88 * entries is changed only in process context and protected 89 * with weak lock mrt_lock. Queue of unresolved entries is protected 90 * with strong spinlock mfc_unres_lock. 91 * 92 * In this case data path is free of exclusive locks at all. 93 */ 94 95 static struct kmem_cache *mrt_cachep __ro_after_init; 96 97 static struct mr_table *ipmr_new_table(struct net *net, u32 id); 98 static void ipmr_free_table(struct mr_table *mrt); 99 100 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 101 struct net_device *dev, struct sk_buff *skb, 102 struct mfc_cache *cache, int local); 103 static int ipmr_cache_report(struct mr_table *mrt, 104 struct sk_buff *pkt, vifi_t vifi, int assert); 105 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 106 int cmd); 107 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); 108 static void mroute_clean_tables(struct mr_table *mrt, int flags); 109 static void ipmr_expire_process(struct timer_list *t); 110 111 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 112 #define ipmr_for_each_table(mrt, net) \ 113 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \ 114 lockdep_rtnl_is_held() || \ 115 list_empty(&net->ipv4.mr_tables)) 116 117 static struct mr_table *ipmr_mr_table_iter(struct net *net, 118 struct mr_table *mrt) 119 { 120 struct mr_table *ret; 121 122 if (!mrt) 123 ret = list_entry_rcu(net->ipv4.mr_tables.next, 124 struct mr_table, list); 125 else 126 ret = list_entry_rcu(mrt->list.next, 127 struct mr_table, list); 128 129 if (&ret->list == &net->ipv4.mr_tables) 130 return NULL; 131 return ret; 132 } 133 134 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 135 { 136 struct mr_table *mrt; 137 138 ipmr_for_each_table(mrt, net) { 139 if (mrt->id == id) 140 return mrt; 141 } 142 return NULL; 143 } 144 145 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 146 struct mr_table **mrt) 147 { 148 int err; 149 struct ipmr_result res; 150 struct fib_lookup_arg arg = { 151 .result = &res, 152 .flags = FIB_LOOKUP_NOREF, 153 }; 154 155 /* update flow if oif or iif point to device enslaved to l3mdev */ 156 l3mdev_update_flow(net, flowi4_to_flowi(flp4)); 157 158 err = fib_rules_lookup(net->ipv4.mr_rules_ops, 159 flowi4_to_flowi(flp4), 0, &arg); 160 if (err < 0) 161 return err; 162 *mrt = res.mrt; 163 return 0; 164 } 165 166 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, 167 int flags, struct fib_lookup_arg *arg) 168 { 169 struct ipmr_result *res = arg->result; 170 struct mr_table *mrt; 171 172 switch (rule->action) { 173 case FR_ACT_TO_TBL: 174 break; 175 case FR_ACT_UNREACHABLE: 176 return -ENETUNREACH; 177 case FR_ACT_PROHIBIT: 178 return -EACCES; 179 case FR_ACT_BLACKHOLE: 180 default: 181 return -EINVAL; 182 } 183 184 arg->table = fib_rule_get_table(rule, arg); 185 186 mrt = ipmr_get_table(rule->fr_net, arg->table); 187 if (!mrt) 188 return -EAGAIN; 189 res->mrt = mrt; 190 return 0; 191 } 192 193 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 194 { 195 return 1; 196 } 197 198 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { 199 FRA_GENERIC_POLICY, 200 }; 201 202 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 203 struct fib_rule_hdr *frh, struct nlattr **tb, 204 struct netlink_ext_ack *extack) 205 { 206 return 0; 207 } 208 209 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 210 struct nlattr **tb) 211 { 212 return 1; 213 } 214 215 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 216 struct fib_rule_hdr *frh) 217 { 218 frh->dst_len = 0; 219 frh->src_len = 0; 220 frh->tos = 0; 221 return 0; 222 } 223 224 static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { 225 .family = RTNL_FAMILY_IPMR, 226 .rule_size = sizeof(struct ipmr_rule), 227 .addr_size = sizeof(u32), 228 .action = ipmr_rule_action, 229 .match = ipmr_rule_match, 230 .configure = ipmr_rule_configure, 231 .compare = ipmr_rule_compare, 232 .fill = ipmr_rule_fill, 233 .nlgroup = RTNLGRP_IPV4_RULE, 234 .policy = ipmr_rule_policy, 235 .owner = THIS_MODULE, 236 }; 237 238 static int __net_init ipmr_rules_init(struct net *net) 239 { 240 struct fib_rules_ops *ops; 241 struct mr_table *mrt; 242 int err; 243 244 ops = fib_rules_register(&ipmr_rules_ops_template, net); 245 if (IS_ERR(ops)) 246 return PTR_ERR(ops); 247 248 INIT_LIST_HEAD(&net->ipv4.mr_tables); 249 250 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 251 if (IS_ERR(mrt)) { 252 err = PTR_ERR(mrt); 253 goto err1; 254 } 255 256 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); 257 if (err < 0) 258 goto err2; 259 260 net->ipv4.mr_rules_ops = ops; 261 return 0; 262 263 err2: 264 ipmr_free_table(mrt); 265 err1: 266 fib_rules_unregister(ops); 267 return err; 268 } 269 270 static void __net_exit ipmr_rules_exit(struct net *net) 271 { 272 struct mr_table *mrt, *next; 273 274 rtnl_lock(); 275 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { 276 list_del(&mrt->list); 277 ipmr_free_table(mrt); 278 } 279 fib_rules_unregister(net->ipv4.mr_rules_ops); 280 rtnl_unlock(); 281 } 282 283 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, 284 struct netlink_ext_ack *extack) 285 { 286 return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR, extack); 287 } 288 289 static unsigned int ipmr_rules_seq_read(struct net *net) 290 { 291 return fib_rules_seq_read(net, RTNL_FAMILY_IPMR); 292 } 293 294 bool ipmr_rule_default(const struct fib_rule *rule) 295 { 296 return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT; 297 } 298 EXPORT_SYMBOL(ipmr_rule_default); 299 #else 300 #define ipmr_for_each_table(mrt, net) \ 301 for (mrt = net->ipv4.mrt; mrt; mrt = NULL) 302 303 static struct mr_table *ipmr_mr_table_iter(struct net *net, 304 struct mr_table *mrt) 305 { 306 if (!mrt) 307 return net->ipv4.mrt; 308 return NULL; 309 } 310 311 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 312 { 313 return net->ipv4.mrt; 314 } 315 316 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 317 struct mr_table **mrt) 318 { 319 *mrt = net->ipv4.mrt; 320 return 0; 321 } 322 323 static int __net_init ipmr_rules_init(struct net *net) 324 { 325 struct mr_table *mrt; 326 327 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 328 if (IS_ERR(mrt)) 329 return PTR_ERR(mrt); 330 net->ipv4.mrt = mrt; 331 return 0; 332 } 333 334 static void __net_exit ipmr_rules_exit(struct net *net) 335 { 336 rtnl_lock(); 337 ipmr_free_table(net->ipv4.mrt); 338 net->ipv4.mrt = NULL; 339 rtnl_unlock(); 340 } 341 342 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, 343 struct netlink_ext_ack *extack) 344 { 345 return 0; 346 } 347 348 static unsigned int ipmr_rules_seq_read(struct net *net) 349 { 350 return 0; 351 } 352 353 bool ipmr_rule_default(const struct fib_rule *rule) 354 { 355 return true; 356 } 357 EXPORT_SYMBOL(ipmr_rule_default); 358 #endif 359 360 static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, 361 const void *ptr) 362 { 363 const struct mfc_cache_cmp_arg *cmparg = arg->key; 364 struct mfc_cache *c = (struct mfc_cache *)ptr; 365 366 return cmparg->mfc_mcastgrp != c->mfc_mcastgrp || 367 cmparg->mfc_origin != c->mfc_origin; 368 } 369 370 static const struct rhashtable_params ipmr_rht_params = { 371 .head_offset = offsetof(struct mr_mfc, mnode), 372 .key_offset = offsetof(struct mfc_cache, cmparg), 373 .key_len = sizeof(struct mfc_cache_cmp_arg), 374 .nelem_hint = 3, 375 .obj_cmpfn = ipmr_hash_cmp, 376 .automatic_shrinking = true, 377 }; 378 379 static void ipmr_new_table_set(struct mr_table *mrt, 380 struct net *net) 381 { 382 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 383 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); 384 #endif 385 } 386 387 static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = { 388 .mfc_mcastgrp = htonl(INADDR_ANY), 389 .mfc_origin = htonl(INADDR_ANY), 390 }; 391 392 static struct mr_table_ops ipmr_mr_table_ops = { 393 .rht_params = &ipmr_rht_params, 394 .cmparg_any = &ipmr_mr_table_ops_cmparg_any, 395 }; 396 397 static struct mr_table *ipmr_new_table(struct net *net, u32 id) 398 { 399 struct mr_table *mrt; 400 401 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ 402 if (id != RT_TABLE_DEFAULT && id >= 1000000000) 403 return ERR_PTR(-EINVAL); 404 405 mrt = ipmr_get_table(net, id); 406 if (mrt) 407 return mrt; 408 409 return mr_table_alloc(net, id, &ipmr_mr_table_ops, 410 ipmr_expire_process, ipmr_new_table_set); 411 } 412 413 static void ipmr_free_table(struct mr_table *mrt) 414 { 415 del_timer_sync(&mrt->ipmr_expire_timer); 416 mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | 417 MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC); 418 rhltable_destroy(&mrt->mfc_hash); 419 kfree(mrt); 420 } 421 422 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 423 424 /* Initialize ipmr pimreg/tunnel in_device */ 425 static bool ipmr_init_vif_indev(const struct net_device *dev) 426 { 427 struct in_device *in_dev; 428 429 ASSERT_RTNL(); 430 431 in_dev = __in_dev_get_rtnl(dev); 432 if (!in_dev) 433 return false; 434 ipv4_devconf_setall(in_dev); 435 neigh_parms_data_state_setall(in_dev->arp_parms); 436 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 437 438 return true; 439 } 440 441 static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) 442 { 443 struct net_device *tunnel_dev, *new_dev; 444 struct ip_tunnel_parm p = { }; 445 int err; 446 447 tunnel_dev = __dev_get_by_name(net, "tunl0"); 448 if (!tunnel_dev) 449 goto out; 450 451 p.iph.daddr = v->vifc_rmt_addr.s_addr; 452 p.iph.saddr = v->vifc_lcl_addr.s_addr; 453 p.iph.version = 4; 454 p.iph.ihl = 5; 455 p.iph.protocol = IPPROTO_IPIP; 456 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 457 458 if (!tunnel_dev->netdev_ops->ndo_tunnel_ctl) 459 goto out; 460 err = tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, 461 SIOCADDTUNNEL); 462 if (err) 463 goto out; 464 465 new_dev = __dev_get_by_name(net, p.name); 466 if (!new_dev) 467 goto out; 468 469 new_dev->flags |= IFF_MULTICAST; 470 if (!ipmr_init_vif_indev(new_dev)) 471 goto out_unregister; 472 if (dev_open(new_dev, NULL)) 473 goto out_unregister; 474 dev_hold(new_dev); 475 err = dev_set_allmulti(new_dev, 1); 476 if (err) { 477 dev_close(new_dev); 478 tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, 479 SIOCDELTUNNEL); 480 dev_put(new_dev); 481 new_dev = ERR_PTR(err); 482 } 483 return new_dev; 484 485 out_unregister: 486 unregister_netdevice(new_dev); 487 out: 488 return ERR_PTR(-ENOBUFS); 489 } 490 491 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 492 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 493 { 494 struct net *net = dev_net(dev); 495 struct mr_table *mrt; 496 struct flowi4 fl4 = { 497 .flowi4_oif = dev->ifindex, 498 .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 499 .flowi4_mark = skb->mark, 500 }; 501 int err; 502 503 err = ipmr_fib_lookup(net, &fl4, &mrt); 504 if (err < 0) { 505 kfree_skb(skb); 506 return err; 507 } 508 509 read_lock(&mrt_lock); 510 dev->stats.tx_bytes += skb->len; 511 dev->stats.tx_packets++; 512 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); 513 read_unlock(&mrt_lock); 514 kfree_skb(skb); 515 return NETDEV_TX_OK; 516 } 517 518 static int reg_vif_get_iflink(const struct net_device *dev) 519 { 520 return 0; 521 } 522 523 static const struct net_device_ops reg_vif_netdev_ops = { 524 .ndo_start_xmit = reg_vif_xmit, 525 .ndo_get_iflink = reg_vif_get_iflink, 526 }; 527 528 static void reg_vif_setup(struct net_device *dev) 529 { 530 dev->type = ARPHRD_PIMREG; 531 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 532 dev->flags = IFF_NOARP; 533 dev->netdev_ops = ®_vif_netdev_ops; 534 dev->needs_free_netdev = true; 535 dev->features |= NETIF_F_NETNS_LOCAL; 536 } 537 538 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 539 { 540 struct net_device *dev; 541 char name[IFNAMSIZ]; 542 543 if (mrt->id == RT_TABLE_DEFAULT) 544 sprintf(name, "pimreg"); 545 else 546 sprintf(name, "pimreg%u", mrt->id); 547 548 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 549 550 if (!dev) 551 return NULL; 552 553 dev_net_set(dev, net); 554 555 if (register_netdevice(dev)) { 556 free_netdev(dev); 557 return NULL; 558 } 559 560 if (!ipmr_init_vif_indev(dev)) 561 goto failure; 562 if (dev_open(dev, NULL)) 563 goto failure; 564 565 dev_hold(dev); 566 567 return dev; 568 569 failure: 570 unregister_netdevice(dev); 571 return NULL; 572 } 573 574 /* called with rcu_read_lock() */ 575 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 576 unsigned int pimlen) 577 { 578 struct net_device *reg_dev = NULL; 579 struct iphdr *encap; 580 581 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 582 /* Check that: 583 * a. packet is really sent to a multicast group 584 * b. packet is not a NULL-REGISTER 585 * c. packet is not truncated 586 */ 587 if (!ipv4_is_multicast(encap->daddr) || 588 encap->tot_len == 0 || 589 ntohs(encap->tot_len) + pimlen > skb->len) 590 return 1; 591 592 read_lock(&mrt_lock); 593 if (mrt->mroute_reg_vif_num >= 0) 594 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; 595 read_unlock(&mrt_lock); 596 597 if (!reg_dev) 598 return 1; 599 600 skb->mac_header = skb->network_header; 601 skb_pull(skb, (u8 *)encap - skb->data); 602 skb_reset_network_header(skb); 603 skb->protocol = htons(ETH_P_IP); 604 skb->ip_summed = CHECKSUM_NONE; 605 606 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 607 608 netif_rx(skb); 609 610 return NET_RX_SUCCESS; 611 } 612 #else 613 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 614 { 615 return NULL; 616 } 617 #endif 618 619 static int call_ipmr_vif_entry_notifiers(struct net *net, 620 enum fib_event_type event_type, 621 struct vif_device *vif, 622 vifi_t vif_index, u32 tb_id) 623 { 624 return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type, 625 vif, vif_index, tb_id, 626 &net->ipv4.ipmr_seq); 627 } 628 629 static int call_ipmr_mfc_entry_notifiers(struct net *net, 630 enum fib_event_type event_type, 631 struct mfc_cache *mfc, u32 tb_id) 632 { 633 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type, 634 &mfc->_c, tb_id, &net->ipv4.ipmr_seq); 635 } 636 637 /** 638 * vif_delete - Delete a VIF entry 639 * @notify: Set to 1, if the caller is a notifier_call 640 */ 641 static int vif_delete(struct mr_table *mrt, int vifi, int notify, 642 struct list_head *head) 643 { 644 struct net *net = read_pnet(&mrt->net); 645 struct vif_device *v; 646 struct net_device *dev; 647 struct in_device *in_dev; 648 649 if (vifi < 0 || vifi >= mrt->maxvif) 650 return -EADDRNOTAVAIL; 651 652 v = &mrt->vif_table[vifi]; 653 654 if (VIF_EXISTS(mrt, vifi)) 655 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi, 656 mrt->id); 657 658 write_lock_bh(&mrt_lock); 659 dev = v->dev; 660 v->dev = NULL; 661 662 if (!dev) { 663 write_unlock_bh(&mrt_lock); 664 return -EADDRNOTAVAIL; 665 } 666 667 if (vifi == mrt->mroute_reg_vif_num) 668 mrt->mroute_reg_vif_num = -1; 669 670 if (vifi + 1 == mrt->maxvif) { 671 int tmp; 672 673 for (tmp = vifi - 1; tmp >= 0; tmp--) { 674 if (VIF_EXISTS(mrt, tmp)) 675 break; 676 } 677 mrt->maxvif = tmp+1; 678 } 679 680 write_unlock_bh(&mrt_lock); 681 682 dev_set_allmulti(dev, -1); 683 684 in_dev = __in_dev_get_rtnl(dev); 685 if (in_dev) { 686 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 687 inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 688 NETCONFA_MC_FORWARDING, 689 dev->ifindex, &in_dev->cnf); 690 ip_rt_multicast_event(in_dev); 691 } 692 693 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify) 694 unregister_netdevice_queue(dev, head); 695 696 dev_put(dev); 697 return 0; 698 } 699 700 static void ipmr_cache_free_rcu(struct rcu_head *head) 701 { 702 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 703 704 kmem_cache_free(mrt_cachep, (struct mfc_cache *)c); 705 } 706 707 static void ipmr_cache_free(struct mfc_cache *c) 708 { 709 call_rcu(&c->_c.rcu, ipmr_cache_free_rcu); 710 } 711 712 /* Destroy an unresolved cache entry, killing queued skbs 713 * and reporting error to netlink readers. 714 */ 715 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 716 { 717 struct net *net = read_pnet(&mrt->net); 718 struct sk_buff *skb; 719 struct nlmsgerr *e; 720 721 atomic_dec(&mrt->cache_resolve_queue_len); 722 723 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) { 724 if (ip_hdr(skb)->version == 0) { 725 struct nlmsghdr *nlh = skb_pull(skb, 726 sizeof(struct iphdr)); 727 nlh->nlmsg_type = NLMSG_ERROR; 728 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 729 skb_trim(skb, nlh->nlmsg_len); 730 e = nlmsg_data(nlh); 731 e->error = -ETIMEDOUT; 732 memset(&e->msg, 0, sizeof(e->msg)); 733 734 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 735 } else { 736 kfree_skb(skb); 737 } 738 } 739 740 ipmr_cache_free(c); 741 } 742 743 /* Timer process for the unresolved queue. */ 744 static void ipmr_expire_process(struct timer_list *t) 745 { 746 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); 747 struct mr_mfc *c, *next; 748 unsigned long expires; 749 unsigned long now; 750 751 if (!spin_trylock(&mfc_unres_lock)) { 752 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); 753 return; 754 } 755 756 if (list_empty(&mrt->mfc_unres_queue)) 757 goto out; 758 759 now = jiffies; 760 expires = 10*HZ; 761 762 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 763 if (time_after(c->mfc_un.unres.expires, now)) { 764 unsigned long interval = c->mfc_un.unres.expires - now; 765 if (interval < expires) 766 expires = interval; 767 continue; 768 } 769 770 list_del(&c->list); 771 mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE); 772 ipmr_destroy_unres(mrt, (struct mfc_cache *)c); 773 } 774 775 if (!list_empty(&mrt->mfc_unres_queue)) 776 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 777 778 out: 779 spin_unlock(&mfc_unres_lock); 780 } 781 782 /* Fill oifs list. It is called under write locked mrt_lock. */ 783 static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, 784 unsigned char *ttls) 785 { 786 int vifi; 787 788 cache->mfc_un.res.minvif = MAXVIFS; 789 cache->mfc_un.res.maxvif = 0; 790 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 791 792 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 793 if (VIF_EXISTS(mrt, vifi) && 794 ttls[vifi] && ttls[vifi] < 255) { 795 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 796 if (cache->mfc_un.res.minvif > vifi) 797 cache->mfc_un.res.minvif = vifi; 798 if (cache->mfc_un.res.maxvif <= vifi) 799 cache->mfc_un.res.maxvif = vifi + 1; 800 } 801 } 802 cache->mfc_un.res.lastuse = jiffies; 803 } 804 805 static int vif_add(struct net *net, struct mr_table *mrt, 806 struct vifctl *vifc, int mrtsock) 807 { 808 struct netdev_phys_item_id ppid = { }; 809 int vifi = vifc->vifc_vifi; 810 struct vif_device *v = &mrt->vif_table[vifi]; 811 struct net_device *dev; 812 struct in_device *in_dev; 813 int err; 814 815 /* Is vif busy ? */ 816 if (VIF_EXISTS(mrt, vifi)) 817 return -EADDRINUSE; 818 819 switch (vifc->vifc_flags) { 820 case VIFF_REGISTER: 821 if (!ipmr_pimsm_enabled()) 822 return -EINVAL; 823 /* Special Purpose VIF in PIM 824 * All the packets will be sent to the daemon 825 */ 826 if (mrt->mroute_reg_vif_num >= 0) 827 return -EADDRINUSE; 828 dev = ipmr_reg_vif(net, mrt); 829 if (!dev) 830 return -ENOBUFS; 831 err = dev_set_allmulti(dev, 1); 832 if (err) { 833 unregister_netdevice(dev); 834 dev_put(dev); 835 return err; 836 } 837 break; 838 case VIFF_TUNNEL: 839 dev = ipmr_new_tunnel(net, vifc); 840 if (IS_ERR(dev)) 841 return PTR_ERR(dev); 842 break; 843 case VIFF_USE_IFINDEX: 844 case 0: 845 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 846 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 847 if (dev && !__in_dev_get_rtnl(dev)) { 848 dev_put(dev); 849 return -EADDRNOTAVAIL; 850 } 851 } else { 852 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 853 } 854 if (!dev) 855 return -EADDRNOTAVAIL; 856 err = dev_set_allmulti(dev, 1); 857 if (err) { 858 dev_put(dev); 859 return err; 860 } 861 break; 862 default: 863 return -EINVAL; 864 } 865 866 in_dev = __in_dev_get_rtnl(dev); 867 if (!in_dev) { 868 dev_put(dev); 869 return -EADDRNOTAVAIL; 870 } 871 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 872 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, 873 dev->ifindex, &in_dev->cnf); 874 ip_rt_multicast_event(in_dev); 875 876 /* Fill in the VIF structures */ 877 vif_device_init(v, dev, vifc->vifc_rate_limit, 878 vifc->vifc_threshold, 879 vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0), 880 (VIFF_TUNNEL | VIFF_REGISTER)); 881 882 err = dev_get_port_parent_id(dev, &ppid, true); 883 if (err == 0) { 884 memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len); 885 v->dev_parent_id.id_len = ppid.id_len; 886 } else { 887 v->dev_parent_id.id_len = 0; 888 } 889 890 v->local = vifc->vifc_lcl_addr.s_addr; 891 v->remote = vifc->vifc_rmt_addr.s_addr; 892 893 /* And finish update writing critical data */ 894 write_lock_bh(&mrt_lock); 895 v->dev = dev; 896 if (v->flags & VIFF_REGISTER) 897 mrt->mroute_reg_vif_num = vifi; 898 if (vifi+1 > mrt->maxvif) 899 mrt->maxvif = vifi+1; 900 write_unlock_bh(&mrt_lock); 901 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id); 902 return 0; 903 } 904 905 /* called with rcu_read_lock() */ 906 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 907 __be32 origin, 908 __be32 mcastgrp) 909 { 910 struct mfc_cache_cmp_arg arg = { 911 .mfc_mcastgrp = mcastgrp, 912 .mfc_origin = origin 913 }; 914 915 return mr_mfc_find(mrt, &arg); 916 } 917 918 /* Look for a (*,G) entry */ 919 static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, 920 __be32 mcastgrp, int vifi) 921 { 922 struct mfc_cache_cmp_arg arg = { 923 .mfc_mcastgrp = mcastgrp, 924 .mfc_origin = htonl(INADDR_ANY) 925 }; 926 927 if (mcastgrp == htonl(INADDR_ANY)) 928 return mr_mfc_find_any_parent(mrt, vifi); 929 return mr_mfc_find_any(mrt, vifi, &arg); 930 } 931 932 /* Look for a (S,G,iif) entry if parent != -1 */ 933 static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt, 934 __be32 origin, __be32 mcastgrp, 935 int parent) 936 { 937 struct mfc_cache_cmp_arg arg = { 938 .mfc_mcastgrp = mcastgrp, 939 .mfc_origin = origin, 940 }; 941 942 return mr_mfc_find_parent(mrt, &arg, parent); 943 } 944 945 /* Allocate a multicast cache entry */ 946 static struct mfc_cache *ipmr_cache_alloc(void) 947 { 948 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 949 950 if (c) { 951 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 952 c->_c.mfc_un.res.minvif = MAXVIFS; 953 c->_c.free = ipmr_cache_free_rcu; 954 refcount_set(&c->_c.mfc_un.res.refcount, 1); 955 } 956 return c; 957 } 958 959 static struct mfc_cache *ipmr_cache_alloc_unres(void) 960 { 961 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 962 963 if (c) { 964 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 965 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 966 } 967 return c; 968 } 969 970 /* A cache entry has gone into a resolved state from queued */ 971 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, 972 struct mfc_cache *uc, struct mfc_cache *c) 973 { 974 struct sk_buff *skb; 975 struct nlmsgerr *e; 976 977 /* Play the pending entries through our router */ 978 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 979 if (ip_hdr(skb)->version == 0) { 980 struct nlmsghdr *nlh = skb_pull(skb, 981 sizeof(struct iphdr)); 982 983 if (mr_fill_mroute(mrt, skb, &c->_c, 984 nlmsg_data(nlh)) > 0) { 985 nlh->nlmsg_len = skb_tail_pointer(skb) - 986 (u8 *)nlh; 987 } else { 988 nlh->nlmsg_type = NLMSG_ERROR; 989 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 990 skb_trim(skb, nlh->nlmsg_len); 991 e = nlmsg_data(nlh); 992 e->error = -EMSGSIZE; 993 memset(&e->msg, 0, sizeof(e->msg)); 994 } 995 996 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 997 } else { 998 ip_mr_forward(net, mrt, skb->dev, skb, c, 0); 999 } 1000 } 1001 } 1002 1003 /* Bounce a cache query up to mrouted and netlink. 1004 * 1005 * Called under mrt_lock. 1006 */ 1007 static int ipmr_cache_report(struct mr_table *mrt, 1008 struct sk_buff *pkt, vifi_t vifi, int assert) 1009 { 1010 const int ihl = ip_hdrlen(pkt); 1011 struct sock *mroute_sk; 1012 struct igmphdr *igmp; 1013 struct igmpmsg *msg; 1014 struct sk_buff *skb; 1015 int ret; 1016 1017 if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) 1018 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 1019 else 1020 skb = alloc_skb(128, GFP_ATOMIC); 1021 1022 if (!skb) 1023 return -ENOBUFS; 1024 1025 if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) { 1026 /* Ugly, but we have no choice with this interface. 1027 * Duplicate old header, fix ihl, length etc. 1028 * And all this only to mangle msg->im_msgtype and 1029 * to set msg->im_mbz to "mbz" :-) 1030 */ 1031 skb_push(skb, sizeof(struct iphdr)); 1032 skb_reset_network_header(skb); 1033 skb_reset_transport_header(skb); 1034 msg = (struct igmpmsg *)skb_network_header(skb); 1035 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 1036 msg->im_msgtype = assert; 1037 msg->im_mbz = 0; 1038 if (assert == IGMPMSG_WRVIFWHOLE) 1039 msg->im_vif = vifi; 1040 else 1041 msg->im_vif = mrt->mroute_reg_vif_num; 1042 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 1043 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 1044 sizeof(struct iphdr)); 1045 } else { 1046 /* Copy the IP header */ 1047 skb_set_network_header(skb, skb->len); 1048 skb_put(skb, ihl); 1049 skb_copy_to_linear_data(skb, pkt->data, ihl); 1050 /* Flag to the kernel this is a route add */ 1051 ip_hdr(skb)->protocol = 0; 1052 msg = (struct igmpmsg *)skb_network_header(skb); 1053 msg->im_vif = vifi; 1054 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1055 /* Add our header */ 1056 igmp = skb_put(skb, sizeof(struct igmphdr)); 1057 igmp->type = assert; 1058 msg->im_msgtype = assert; 1059 igmp->code = 0; 1060 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 1061 skb->transport_header = skb->network_header; 1062 } 1063 1064 rcu_read_lock(); 1065 mroute_sk = rcu_dereference(mrt->mroute_sk); 1066 if (!mroute_sk) { 1067 rcu_read_unlock(); 1068 kfree_skb(skb); 1069 return -EINVAL; 1070 } 1071 1072 igmpmsg_netlink_event(mrt, skb); 1073 1074 /* Deliver to mrouted */ 1075 ret = sock_queue_rcv_skb(mroute_sk, skb); 1076 rcu_read_unlock(); 1077 if (ret < 0) { 1078 net_warn_ratelimited("mroute: pending queue full, dropping entries\n"); 1079 kfree_skb(skb); 1080 } 1081 1082 return ret; 1083 } 1084 1085 /* Queue a packet for resolution. It gets locked cache entry! */ 1086 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, 1087 struct sk_buff *skb, struct net_device *dev) 1088 { 1089 const struct iphdr *iph = ip_hdr(skb); 1090 struct mfc_cache *c; 1091 bool found = false; 1092 int err; 1093 1094 spin_lock_bh(&mfc_unres_lock); 1095 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1096 if (c->mfc_mcastgrp == iph->daddr && 1097 c->mfc_origin == iph->saddr) { 1098 found = true; 1099 break; 1100 } 1101 } 1102 1103 if (!found) { 1104 /* Create a new entry if allowable */ 1105 c = ipmr_cache_alloc_unres(); 1106 if (!c) { 1107 spin_unlock_bh(&mfc_unres_lock); 1108 1109 kfree_skb(skb); 1110 return -ENOBUFS; 1111 } 1112 1113 /* Fill in the new cache entry */ 1114 c->_c.mfc_parent = -1; 1115 c->mfc_origin = iph->saddr; 1116 c->mfc_mcastgrp = iph->daddr; 1117 1118 /* Reflect first query at mrouted. */ 1119 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 1120 1121 if (err < 0) { 1122 /* If the report failed throw the cache entry 1123 out - Brad Parker 1124 */ 1125 spin_unlock_bh(&mfc_unres_lock); 1126 1127 ipmr_cache_free(c); 1128 kfree_skb(skb); 1129 return err; 1130 } 1131 1132 atomic_inc(&mrt->cache_resolve_queue_len); 1133 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1134 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1135 1136 if (atomic_read(&mrt->cache_resolve_queue_len) == 1) 1137 mod_timer(&mrt->ipmr_expire_timer, 1138 c->_c.mfc_un.unres.expires); 1139 } 1140 1141 /* See if we can append the packet */ 1142 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1143 kfree_skb(skb); 1144 err = -ENOBUFS; 1145 } else { 1146 if (dev) { 1147 skb->dev = dev; 1148 skb->skb_iif = dev->ifindex; 1149 } 1150 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1151 err = 0; 1152 } 1153 1154 spin_unlock_bh(&mfc_unres_lock); 1155 return err; 1156 } 1157 1158 /* MFC cache manipulation by user space mroute daemon */ 1159 1160 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) 1161 { 1162 struct net *net = read_pnet(&mrt->net); 1163 struct mfc_cache *c; 1164 1165 /* The entries are added/deleted only under RTNL */ 1166 rcu_read_lock(); 1167 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1168 mfc->mfcc_mcastgrp.s_addr, parent); 1169 rcu_read_unlock(); 1170 if (!c) 1171 return -ENOENT; 1172 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params); 1173 list_del_rcu(&c->_c.list); 1174 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id); 1175 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1176 mr_cache_put(&c->_c); 1177 1178 return 0; 1179 } 1180 1181 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, 1182 struct mfcctl *mfc, int mrtsock, int parent) 1183 { 1184 struct mfc_cache *uc, *c; 1185 struct mr_mfc *_uc; 1186 bool found; 1187 int ret; 1188 1189 if (mfc->mfcc_parent >= MAXVIFS) 1190 return -ENFILE; 1191 1192 /* The entries are added/deleted only under RTNL */ 1193 rcu_read_lock(); 1194 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1195 mfc->mfcc_mcastgrp.s_addr, parent); 1196 rcu_read_unlock(); 1197 if (c) { 1198 write_lock_bh(&mrt_lock); 1199 c->_c.mfc_parent = mfc->mfcc_parent; 1200 ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); 1201 if (!mrtsock) 1202 c->_c.mfc_flags |= MFC_STATIC; 1203 write_unlock_bh(&mrt_lock); 1204 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, 1205 mrt->id); 1206 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1207 return 0; 1208 } 1209 1210 if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) && 1211 !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1212 return -EINVAL; 1213 1214 c = ipmr_cache_alloc(); 1215 if (!c) 1216 return -ENOMEM; 1217 1218 c->mfc_origin = mfc->mfcc_origin.s_addr; 1219 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1220 c->_c.mfc_parent = mfc->mfcc_parent; 1221 ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); 1222 if (!mrtsock) 1223 c->_c.mfc_flags |= MFC_STATIC; 1224 1225 ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1226 ipmr_rht_params); 1227 if (ret) { 1228 pr_err("ipmr: rhtable insert error %d\n", ret); 1229 ipmr_cache_free(c); 1230 return ret; 1231 } 1232 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1233 /* Check to see if we resolved a queued list. If so we 1234 * need to send on the frames and tidy up. 1235 */ 1236 found = false; 1237 spin_lock_bh(&mfc_unres_lock); 1238 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1239 uc = (struct mfc_cache *)_uc; 1240 if (uc->mfc_origin == c->mfc_origin && 1241 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1242 list_del(&_uc->list); 1243 atomic_dec(&mrt->cache_resolve_queue_len); 1244 found = true; 1245 break; 1246 } 1247 } 1248 if (list_empty(&mrt->mfc_unres_queue)) 1249 del_timer(&mrt->ipmr_expire_timer); 1250 spin_unlock_bh(&mfc_unres_lock); 1251 1252 if (found) { 1253 ipmr_cache_resolve(net, mrt, uc, c); 1254 ipmr_cache_free(uc); 1255 } 1256 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id); 1257 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1258 return 0; 1259 } 1260 1261 /* Close the multicast socket, and clear the vif tables etc */ 1262 static void mroute_clean_tables(struct mr_table *mrt, int flags) 1263 { 1264 struct net *net = read_pnet(&mrt->net); 1265 struct mr_mfc *c, *tmp; 1266 struct mfc_cache *cache; 1267 LIST_HEAD(list); 1268 int i; 1269 1270 /* Shut down all active vif entries */ 1271 if (flags & (MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC)) { 1272 for (i = 0; i < mrt->maxvif; i++) { 1273 if (((mrt->vif_table[i].flags & VIFF_STATIC) && 1274 !(flags & MRT_FLUSH_VIFS_STATIC)) || 1275 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS))) 1276 continue; 1277 vif_delete(mrt, i, 0, &list); 1278 } 1279 unregister_netdevice_many(&list); 1280 } 1281 1282 /* Wipe the cache */ 1283 if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) { 1284 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1285 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) || 1286 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC))) 1287 continue; 1288 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1289 list_del_rcu(&c->list); 1290 cache = (struct mfc_cache *)c; 1291 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache, 1292 mrt->id); 1293 mroute_netlink_event(mrt, cache, RTM_DELROUTE); 1294 mr_cache_put(c); 1295 } 1296 } 1297 1298 if (flags & MRT_FLUSH_MFC) { 1299 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1300 spin_lock_bh(&mfc_unres_lock); 1301 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1302 list_del(&c->list); 1303 cache = (struct mfc_cache *)c; 1304 mroute_netlink_event(mrt, cache, RTM_DELROUTE); 1305 ipmr_destroy_unres(mrt, cache); 1306 } 1307 spin_unlock_bh(&mfc_unres_lock); 1308 } 1309 } 1310 } 1311 1312 /* called from ip_ra_control(), before an RCU grace period, 1313 * we dont need to call synchronize_rcu() here 1314 */ 1315 static void mrtsock_destruct(struct sock *sk) 1316 { 1317 struct net *net = sock_net(sk); 1318 struct mr_table *mrt; 1319 1320 rtnl_lock(); 1321 ipmr_for_each_table(mrt, net) { 1322 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1323 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1324 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1325 NETCONFA_MC_FORWARDING, 1326 NETCONFA_IFINDEX_ALL, 1327 net->ipv4.devconf_all); 1328 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1329 mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC); 1330 } 1331 } 1332 rtnl_unlock(); 1333 } 1334 1335 /* Socket options and virtual interface manipulation. The whole 1336 * virtual interface system is a complete heap, but unfortunately 1337 * that's how BSD mrouted happens to think. Maybe one day with a proper 1338 * MOSPF/PIM router set up we can clean this up. 1339 */ 1340 1341 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, 1342 unsigned int optlen) 1343 { 1344 struct net *net = sock_net(sk); 1345 int val, ret = 0, parent = 0; 1346 struct mr_table *mrt; 1347 struct vifctl vif; 1348 struct mfcctl mfc; 1349 bool do_wrvifwhole; 1350 u32 uval; 1351 1352 /* There's one exception to the lock - MRT_DONE which needs to unlock */ 1353 rtnl_lock(); 1354 if (sk->sk_type != SOCK_RAW || 1355 inet_sk(sk)->inet_num != IPPROTO_IGMP) { 1356 ret = -EOPNOTSUPP; 1357 goto out_unlock; 1358 } 1359 1360 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1361 if (!mrt) { 1362 ret = -ENOENT; 1363 goto out_unlock; 1364 } 1365 if (optname != MRT_INIT) { 1366 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1367 !ns_capable(net->user_ns, CAP_NET_ADMIN)) { 1368 ret = -EACCES; 1369 goto out_unlock; 1370 } 1371 } 1372 1373 switch (optname) { 1374 case MRT_INIT: 1375 if (optlen != sizeof(int)) { 1376 ret = -EINVAL; 1377 break; 1378 } 1379 if (rtnl_dereference(mrt->mroute_sk)) { 1380 ret = -EADDRINUSE; 1381 break; 1382 } 1383 1384 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1385 if (ret == 0) { 1386 rcu_assign_pointer(mrt->mroute_sk, sk); 1387 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1388 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1389 NETCONFA_MC_FORWARDING, 1390 NETCONFA_IFINDEX_ALL, 1391 net->ipv4.devconf_all); 1392 } 1393 break; 1394 case MRT_DONE: 1395 if (sk != rcu_access_pointer(mrt->mroute_sk)) { 1396 ret = -EACCES; 1397 } else { 1398 /* We need to unlock here because mrtsock_destruct takes 1399 * care of rtnl itself and we can't change that due to 1400 * the IP_ROUTER_ALERT setsockopt which runs without it. 1401 */ 1402 rtnl_unlock(); 1403 ret = ip_ra_control(sk, 0, NULL); 1404 goto out; 1405 } 1406 break; 1407 case MRT_ADD_VIF: 1408 case MRT_DEL_VIF: 1409 if (optlen != sizeof(vif)) { 1410 ret = -EINVAL; 1411 break; 1412 } 1413 if (copy_from_user(&vif, optval, sizeof(vif))) { 1414 ret = -EFAULT; 1415 break; 1416 } 1417 if (vif.vifc_vifi >= MAXVIFS) { 1418 ret = -ENFILE; 1419 break; 1420 } 1421 if (optname == MRT_ADD_VIF) { 1422 ret = vif_add(net, mrt, &vif, 1423 sk == rtnl_dereference(mrt->mroute_sk)); 1424 } else { 1425 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1426 } 1427 break; 1428 /* Manipulate the forwarding caches. These live 1429 * in a sort of kernel/user symbiosis. 1430 */ 1431 case MRT_ADD_MFC: 1432 case MRT_DEL_MFC: 1433 parent = -1; 1434 fallthrough; 1435 case MRT_ADD_MFC_PROXY: 1436 case MRT_DEL_MFC_PROXY: 1437 if (optlen != sizeof(mfc)) { 1438 ret = -EINVAL; 1439 break; 1440 } 1441 if (copy_from_user(&mfc, optval, sizeof(mfc))) { 1442 ret = -EFAULT; 1443 break; 1444 } 1445 if (parent == 0) 1446 parent = mfc.mfcc_parent; 1447 if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) 1448 ret = ipmr_mfc_delete(mrt, &mfc, parent); 1449 else 1450 ret = ipmr_mfc_add(net, mrt, &mfc, 1451 sk == rtnl_dereference(mrt->mroute_sk), 1452 parent); 1453 break; 1454 case MRT_FLUSH: 1455 if (optlen != sizeof(val)) { 1456 ret = -EINVAL; 1457 break; 1458 } 1459 if (get_user(val, (int __user *)optval)) { 1460 ret = -EFAULT; 1461 break; 1462 } 1463 mroute_clean_tables(mrt, val); 1464 break; 1465 /* Control PIM assert. */ 1466 case MRT_ASSERT: 1467 if (optlen != sizeof(val)) { 1468 ret = -EINVAL; 1469 break; 1470 } 1471 if (get_user(val, (int __user *)optval)) { 1472 ret = -EFAULT; 1473 break; 1474 } 1475 mrt->mroute_do_assert = val; 1476 break; 1477 case MRT_PIM: 1478 if (!ipmr_pimsm_enabled()) { 1479 ret = -ENOPROTOOPT; 1480 break; 1481 } 1482 if (optlen != sizeof(val)) { 1483 ret = -EINVAL; 1484 break; 1485 } 1486 if (get_user(val, (int __user *)optval)) { 1487 ret = -EFAULT; 1488 break; 1489 } 1490 1491 do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE); 1492 val = !!val; 1493 if (val != mrt->mroute_do_pim) { 1494 mrt->mroute_do_pim = val; 1495 mrt->mroute_do_assert = val; 1496 mrt->mroute_do_wrvifwhole = do_wrvifwhole; 1497 } 1498 break; 1499 case MRT_TABLE: 1500 if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { 1501 ret = -ENOPROTOOPT; 1502 break; 1503 } 1504 if (optlen != sizeof(uval)) { 1505 ret = -EINVAL; 1506 break; 1507 } 1508 if (get_user(uval, (u32 __user *)optval)) { 1509 ret = -EFAULT; 1510 break; 1511 } 1512 1513 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1514 ret = -EBUSY; 1515 } else { 1516 mrt = ipmr_new_table(net, uval); 1517 if (IS_ERR(mrt)) 1518 ret = PTR_ERR(mrt); 1519 else 1520 raw_sk(sk)->ipmr_table = uval; 1521 } 1522 break; 1523 /* Spurious command, or MRT_VERSION which you cannot set. */ 1524 default: 1525 ret = -ENOPROTOOPT; 1526 } 1527 out_unlock: 1528 rtnl_unlock(); 1529 out: 1530 return ret; 1531 } 1532 1533 /* Getsock opt support for the multicast routing system. */ 1534 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) 1535 { 1536 int olr; 1537 int val; 1538 struct net *net = sock_net(sk); 1539 struct mr_table *mrt; 1540 1541 if (sk->sk_type != SOCK_RAW || 1542 inet_sk(sk)->inet_num != IPPROTO_IGMP) 1543 return -EOPNOTSUPP; 1544 1545 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1546 if (!mrt) 1547 return -ENOENT; 1548 1549 switch (optname) { 1550 case MRT_VERSION: 1551 val = 0x0305; 1552 break; 1553 case MRT_PIM: 1554 if (!ipmr_pimsm_enabled()) 1555 return -ENOPROTOOPT; 1556 val = mrt->mroute_do_pim; 1557 break; 1558 case MRT_ASSERT: 1559 val = mrt->mroute_do_assert; 1560 break; 1561 default: 1562 return -ENOPROTOOPT; 1563 } 1564 1565 if (get_user(olr, optlen)) 1566 return -EFAULT; 1567 olr = min_t(unsigned int, olr, sizeof(int)); 1568 if (olr < 0) 1569 return -EINVAL; 1570 if (put_user(olr, optlen)) 1571 return -EFAULT; 1572 if (copy_to_user(optval, &val, olr)) 1573 return -EFAULT; 1574 return 0; 1575 } 1576 1577 /* The IP multicast ioctl support routines. */ 1578 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) 1579 { 1580 struct sioc_sg_req sr; 1581 struct sioc_vif_req vr; 1582 struct vif_device *vif; 1583 struct mfc_cache *c; 1584 struct net *net = sock_net(sk); 1585 struct mr_table *mrt; 1586 1587 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1588 if (!mrt) 1589 return -ENOENT; 1590 1591 switch (cmd) { 1592 case SIOCGETVIFCNT: 1593 if (copy_from_user(&vr, arg, sizeof(vr))) 1594 return -EFAULT; 1595 if (vr.vifi >= mrt->maxvif) 1596 return -EINVAL; 1597 vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); 1598 read_lock(&mrt_lock); 1599 vif = &mrt->vif_table[vr.vifi]; 1600 if (VIF_EXISTS(mrt, vr.vifi)) { 1601 vr.icount = vif->pkt_in; 1602 vr.ocount = vif->pkt_out; 1603 vr.ibytes = vif->bytes_in; 1604 vr.obytes = vif->bytes_out; 1605 read_unlock(&mrt_lock); 1606 1607 if (copy_to_user(arg, &vr, sizeof(vr))) 1608 return -EFAULT; 1609 return 0; 1610 } 1611 read_unlock(&mrt_lock); 1612 return -EADDRNOTAVAIL; 1613 case SIOCGETSGCNT: 1614 if (copy_from_user(&sr, arg, sizeof(sr))) 1615 return -EFAULT; 1616 1617 rcu_read_lock(); 1618 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1619 if (c) { 1620 sr.pktcnt = c->_c.mfc_un.res.pkt; 1621 sr.bytecnt = c->_c.mfc_un.res.bytes; 1622 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1623 rcu_read_unlock(); 1624 1625 if (copy_to_user(arg, &sr, sizeof(sr))) 1626 return -EFAULT; 1627 return 0; 1628 } 1629 rcu_read_unlock(); 1630 return -EADDRNOTAVAIL; 1631 default: 1632 return -ENOIOCTLCMD; 1633 } 1634 } 1635 1636 #ifdef CONFIG_COMPAT 1637 struct compat_sioc_sg_req { 1638 struct in_addr src; 1639 struct in_addr grp; 1640 compat_ulong_t pktcnt; 1641 compat_ulong_t bytecnt; 1642 compat_ulong_t wrong_if; 1643 }; 1644 1645 struct compat_sioc_vif_req { 1646 vifi_t vifi; /* Which iface */ 1647 compat_ulong_t icount; 1648 compat_ulong_t ocount; 1649 compat_ulong_t ibytes; 1650 compat_ulong_t obytes; 1651 }; 1652 1653 int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1654 { 1655 struct compat_sioc_sg_req sr; 1656 struct compat_sioc_vif_req vr; 1657 struct vif_device *vif; 1658 struct mfc_cache *c; 1659 struct net *net = sock_net(sk); 1660 struct mr_table *mrt; 1661 1662 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1663 if (!mrt) 1664 return -ENOENT; 1665 1666 switch (cmd) { 1667 case SIOCGETVIFCNT: 1668 if (copy_from_user(&vr, arg, sizeof(vr))) 1669 return -EFAULT; 1670 if (vr.vifi >= mrt->maxvif) 1671 return -EINVAL; 1672 vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); 1673 read_lock(&mrt_lock); 1674 vif = &mrt->vif_table[vr.vifi]; 1675 if (VIF_EXISTS(mrt, vr.vifi)) { 1676 vr.icount = vif->pkt_in; 1677 vr.ocount = vif->pkt_out; 1678 vr.ibytes = vif->bytes_in; 1679 vr.obytes = vif->bytes_out; 1680 read_unlock(&mrt_lock); 1681 1682 if (copy_to_user(arg, &vr, sizeof(vr))) 1683 return -EFAULT; 1684 return 0; 1685 } 1686 read_unlock(&mrt_lock); 1687 return -EADDRNOTAVAIL; 1688 case SIOCGETSGCNT: 1689 if (copy_from_user(&sr, arg, sizeof(sr))) 1690 return -EFAULT; 1691 1692 rcu_read_lock(); 1693 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1694 if (c) { 1695 sr.pktcnt = c->_c.mfc_un.res.pkt; 1696 sr.bytecnt = c->_c.mfc_un.res.bytes; 1697 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1698 rcu_read_unlock(); 1699 1700 if (copy_to_user(arg, &sr, sizeof(sr))) 1701 return -EFAULT; 1702 return 0; 1703 } 1704 rcu_read_unlock(); 1705 return -EADDRNOTAVAIL; 1706 default: 1707 return -ENOIOCTLCMD; 1708 } 1709 } 1710 #endif 1711 1712 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1713 { 1714 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1715 struct net *net = dev_net(dev); 1716 struct mr_table *mrt; 1717 struct vif_device *v; 1718 int ct; 1719 1720 if (event != NETDEV_UNREGISTER) 1721 return NOTIFY_DONE; 1722 1723 ipmr_for_each_table(mrt, net) { 1724 v = &mrt->vif_table[0]; 1725 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1726 if (v->dev == dev) 1727 vif_delete(mrt, ct, 1, NULL); 1728 } 1729 } 1730 return NOTIFY_DONE; 1731 } 1732 1733 static struct notifier_block ip_mr_notifier = { 1734 .notifier_call = ipmr_device_event, 1735 }; 1736 1737 /* Encapsulate a packet by attaching a valid IPIP header to it. 1738 * This avoids tunnel drivers and other mess and gives us the speed so 1739 * important for multicast video. 1740 */ 1741 static void ip_encap(struct net *net, struct sk_buff *skb, 1742 __be32 saddr, __be32 daddr) 1743 { 1744 struct iphdr *iph; 1745 const struct iphdr *old_iph = ip_hdr(skb); 1746 1747 skb_push(skb, sizeof(struct iphdr)); 1748 skb->transport_header = skb->network_header; 1749 skb_reset_network_header(skb); 1750 iph = ip_hdr(skb); 1751 1752 iph->version = 4; 1753 iph->tos = old_iph->tos; 1754 iph->ttl = old_iph->ttl; 1755 iph->frag_off = 0; 1756 iph->daddr = daddr; 1757 iph->saddr = saddr; 1758 iph->protocol = IPPROTO_IPIP; 1759 iph->ihl = 5; 1760 iph->tot_len = htons(skb->len); 1761 ip_select_ident(net, skb, NULL); 1762 ip_send_check(iph); 1763 1764 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1765 nf_reset_ct(skb); 1766 } 1767 1768 static inline int ipmr_forward_finish(struct net *net, struct sock *sk, 1769 struct sk_buff *skb) 1770 { 1771 struct ip_options *opt = &(IPCB(skb)->opt); 1772 1773 IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); 1774 IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len); 1775 1776 if (unlikely(opt->optlen)) 1777 ip_forward_options(skb); 1778 1779 return dst_output(net, sk, skb); 1780 } 1781 1782 #ifdef CONFIG_NET_SWITCHDEV 1783 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1784 int in_vifi, int out_vifi) 1785 { 1786 struct vif_device *out_vif = &mrt->vif_table[out_vifi]; 1787 struct vif_device *in_vif = &mrt->vif_table[in_vifi]; 1788 1789 if (!skb->offload_l3_fwd_mark) 1790 return false; 1791 if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len) 1792 return false; 1793 return netdev_phys_item_id_same(&out_vif->dev_parent_id, 1794 &in_vif->dev_parent_id); 1795 } 1796 #else 1797 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1798 int in_vifi, int out_vifi) 1799 { 1800 return false; 1801 } 1802 #endif 1803 1804 /* Processing handlers for ipmr_forward */ 1805 1806 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, 1807 int in_vifi, struct sk_buff *skb, int vifi) 1808 { 1809 const struct iphdr *iph = ip_hdr(skb); 1810 struct vif_device *vif = &mrt->vif_table[vifi]; 1811 struct net_device *dev; 1812 struct rtable *rt; 1813 struct flowi4 fl4; 1814 int encap = 0; 1815 1816 if (!vif->dev) 1817 goto out_free; 1818 1819 if (vif->flags & VIFF_REGISTER) { 1820 vif->pkt_out++; 1821 vif->bytes_out += skb->len; 1822 vif->dev->stats.tx_bytes += skb->len; 1823 vif->dev->stats.tx_packets++; 1824 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); 1825 goto out_free; 1826 } 1827 1828 if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) 1829 goto out_free; 1830 1831 if (vif->flags & VIFF_TUNNEL) { 1832 rt = ip_route_output_ports(net, &fl4, NULL, 1833 vif->remote, vif->local, 1834 0, 0, 1835 IPPROTO_IPIP, 1836 RT_TOS(iph->tos), vif->link); 1837 if (IS_ERR(rt)) 1838 goto out_free; 1839 encap = sizeof(struct iphdr); 1840 } else { 1841 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0, 1842 0, 0, 1843 IPPROTO_IPIP, 1844 RT_TOS(iph->tos), vif->link); 1845 if (IS_ERR(rt)) 1846 goto out_free; 1847 } 1848 1849 dev = rt->dst.dev; 1850 1851 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { 1852 /* Do not fragment multicasts. Alas, IPv4 does not 1853 * allow to send ICMP, so that packets will disappear 1854 * to blackhole. 1855 */ 1856 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 1857 ip_rt_put(rt); 1858 goto out_free; 1859 } 1860 1861 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len; 1862 1863 if (skb_cow(skb, encap)) { 1864 ip_rt_put(rt); 1865 goto out_free; 1866 } 1867 1868 vif->pkt_out++; 1869 vif->bytes_out += skb->len; 1870 1871 skb_dst_drop(skb); 1872 skb_dst_set(skb, &rt->dst); 1873 ip_decrease_ttl(ip_hdr(skb)); 1874 1875 /* FIXME: forward and output firewalls used to be called here. 1876 * What do we do with netfilter? -- RR 1877 */ 1878 if (vif->flags & VIFF_TUNNEL) { 1879 ip_encap(net, skb, vif->local, vif->remote); 1880 /* FIXME: extra output firewall step used to be here. --RR */ 1881 vif->dev->stats.tx_packets++; 1882 vif->dev->stats.tx_bytes += skb->len; 1883 } 1884 1885 IPCB(skb)->flags |= IPSKB_FORWARDED; 1886 1887 /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1888 * not only before forwarding, but after forwarding on all output 1889 * interfaces. It is clear, if mrouter runs a multicasting 1890 * program, it should receive packets not depending to what interface 1891 * program is joined. 1892 * If we will not make it, the program will have to join on all 1893 * interfaces. On the other hand, multihoming host (or router, but 1894 * not mrouter) cannot join to more than one interface - it will 1895 * result in receiving multiple packets. 1896 */ 1897 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, 1898 net, NULL, skb, skb->dev, dev, 1899 ipmr_forward_finish); 1900 return; 1901 1902 out_free: 1903 kfree_skb(skb); 1904 } 1905 1906 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) 1907 { 1908 int ct; 1909 1910 for (ct = mrt->maxvif-1; ct >= 0; ct--) { 1911 if (mrt->vif_table[ct].dev == dev) 1912 break; 1913 } 1914 return ct; 1915 } 1916 1917 /* "local" means that we should preserve one skb (for local delivery) */ 1918 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 1919 struct net_device *dev, struct sk_buff *skb, 1920 struct mfc_cache *c, int local) 1921 { 1922 int true_vifi = ipmr_find_vif(mrt, dev); 1923 int psend = -1; 1924 int vif, ct; 1925 1926 vif = c->_c.mfc_parent; 1927 c->_c.mfc_un.res.pkt++; 1928 c->_c.mfc_un.res.bytes += skb->len; 1929 c->_c.mfc_un.res.lastuse = jiffies; 1930 1931 if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { 1932 struct mfc_cache *cache_proxy; 1933 1934 /* For an (*,G) entry, we only check that the incomming 1935 * interface is part of the static tree. 1936 */ 1937 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 1938 if (cache_proxy && 1939 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) 1940 goto forward; 1941 } 1942 1943 /* Wrong interface: drop packet and (maybe) send PIM assert. */ 1944 if (mrt->vif_table[vif].dev != dev) { 1945 if (rt_is_output_route(skb_rtable(skb))) { 1946 /* It is our own packet, looped back. 1947 * Very complicated situation... 1948 * 1949 * The best workaround until routing daemons will be 1950 * fixed is not to redistribute packet, if it was 1951 * send through wrong interface. It means, that 1952 * multicast applications WILL NOT work for 1953 * (S,G), which have default multicast route pointing 1954 * to wrong oif. In any case, it is not a good 1955 * idea to use multicasting applications on router. 1956 */ 1957 goto dont_forward; 1958 } 1959 1960 c->_c.mfc_un.res.wrong_if++; 1961 1962 if (true_vifi >= 0 && mrt->mroute_do_assert && 1963 /* pimsm uses asserts, when switching from RPT to SPT, 1964 * so that we cannot check that packet arrived on an oif. 1965 * It is bad, but otherwise we would need to move pretty 1966 * large chunk of pimd to kernel. Ough... --ANK 1967 */ 1968 (mrt->mroute_do_pim || 1969 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 1970 time_after(jiffies, 1971 c->_c.mfc_un.res.last_assert + 1972 MFC_ASSERT_THRESH)) { 1973 c->_c.mfc_un.res.last_assert = jiffies; 1974 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); 1975 if (mrt->mroute_do_wrvifwhole) 1976 ipmr_cache_report(mrt, skb, true_vifi, 1977 IGMPMSG_WRVIFWHOLE); 1978 } 1979 goto dont_forward; 1980 } 1981 1982 forward: 1983 mrt->vif_table[vif].pkt_in++; 1984 mrt->vif_table[vif].bytes_in += skb->len; 1985 1986 /* Forward the frame */ 1987 if (c->mfc_origin == htonl(INADDR_ANY) && 1988 c->mfc_mcastgrp == htonl(INADDR_ANY)) { 1989 if (true_vifi >= 0 && 1990 true_vifi != c->_c.mfc_parent && 1991 ip_hdr(skb)->ttl > 1992 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 1993 /* It's an (*,*) entry and the packet is not coming from 1994 * the upstream: forward the packet to the upstream 1995 * only. 1996 */ 1997 psend = c->_c.mfc_parent; 1998 goto last_forward; 1999 } 2000 goto dont_forward; 2001 } 2002 for (ct = c->_c.mfc_un.res.maxvif - 1; 2003 ct >= c->_c.mfc_un.res.minvif; ct--) { 2004 /* For (*,G) entry, don't forward to the incoming interface */ 2005 if ((c->mfc_origin != htonl(INADDR_ANY) || 2006 ct != true_vifi) && 2007 ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) { 2008 if (psend != -1) { 2009 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2010 2011 if (skb2) 2012 ipmr_queue_xmit(net, mrt, true_vifi, 2013 skb2, psend); 2014 } 2015 psend = ct; 2016 } 2017 } 2018 last_forward: 2019 if (psend != -1) { 2020 if (local) { 2021 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2022 2023 if (skb2) 2024 ipmr_queue_xmit(net, mrt, true_vifi, skb2, 2025 psend); 2026 } else { 2027 ipmr_queue_xmit(net, mrt, true_vifi, skb, psend); 2028 return; 2029 } 2030 } 2031 2032 dont_forward: 2033 if (!local) 2034 kfree_skb(skb); 2035 } 2036 2037 static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) 2038 { 2039 struct rtable *rt = skb_rtable(skb); 2040 struct iphdr *iph = ip_hdr(skb); 2041 struct flowi4 fl4 = { 2042 .daddr = iph->daddr, 2043 .saddr = iph->saddr, 2044 .flowi4_tos = RT_TOS(iph->tos), 2045 .flowi4_oif = (rt_is_output_route(rt) ? 2046 skb->dev->ifindex : 0), 2047 .flowi4_iif = (rt_is_output_route(rt) ? 2048 LOOPBACK_IFINDEX : 2049 skb->dev->ifindex), 2050 .flowi4_mark = skb->mark, 2051 }; 2052 struct mr_table *mrt; 2053 int err; 2054 2055 err = ipmr_fib_lookup(net, &fl4, &mrt); 2056 if (err) 2057 return ERR_PTR(err); 2058 return mrt; 2059 } 2060 2061 /* Multicast packets for forwarding arrive here 2062 * Called with rcu_read_lock(); 2063 */ 2064 int ip_mr_input(struct sk_buff *skb) 2065 { 2066 struct mfc_cache *cache; 2067 struct net *net = dev_net(skb->dev); 2068 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 2069 struct mr_table *mrt; 2070 struct net_device *dev; 2071 2072 /* skb->dev passed in is the loX master dev for vrfs. 2073 * As there are no vifs associated with loopback devices, 2074 * get the proper interface that does have a vif associated with it. 2075 */ 2076 dev = skb->dev; 2077 if (netif_is_l3_master(skb->dev)) { 2078 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2079 if (!dev) { 2080 kfree_skb(skb); 2081 return -ENODEV; 2082 } 2083 } 2084 2085 /* Packet is looped back after forward, it should not be 2086 * forwarded second time, but still can be delivered locally. 2087 */ 2088 if (IPCB(skb)->flags & IPSKB_FORWARDED) 2089 goto dont_forward; 2090 2091 mrt = ipmr_rt_fib_lookup(net, skb); 2092 if (IS_ERR(mrt)) { 2093 kfree_skb(skb); 2094 return PTR_ERR(mrt); 2095 } 2096 if (!local) { 2097 if (IPCB(skb)->opt.router_alert) { 2098 if (ip_call_ra_chain(skb)) 2099 return 0; 2100 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) { 2101 /* IGMPv1 (and broken IGMPv2 implementations sort of 2102 * Cisco IOS <= 11.2(8)) do not put router alert 2103 * option to IGMP packets destined to routable 2104 * groups. It is very bad, because it means 2105 * that we can forward NO IGMP messages. 2106 */ 2107 struct sock *mroute_sk; 2108 2109 mroute_sk = rcu_dereference(mrt->mroute_sk); 2110 if (mroute_sk) { 2111 nf_reset_ct(skb); 2112 raw_rcv(mroute_sk, skb); 2113 return 0; 2114 } 2115 } 2116 } 2117 2118 /* already under rcu_read_lock() */ 2119 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 2120 if (!cache) { 2121 int vif = ipmr_find_vif(mrt, dev); 2122 2123 if (vif >= 0) 2124 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, 2125 vif); 2126 } 2127 2128 /* No usable cache entry */ 2129 if (!cache) { 2130 int vif; 2131 2132 if (local) { 2133 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2134 ip_local_deliver(skb); 2135 if (!skb2) 2136 return -ENOBUFS; 2137 skb = skb2; 2138 } 2139 2140 read_lock(&mrt_lock); 2141 vif = ipmr_find_vif(mrt, dev); 2142 if (vif >= 0) { 2143 int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev); 2144 read_unlock(&mrt_lock); 2145 2146 return err2; 2147 } 2148 read_unlock(&mrt_lock); 2149 kfree_skb(skb); 2150 return -ENODEV; 2151 } 2152 2153 read_lock(&mrt_lock); 2154 ip_mr_forward(net, mrt, dev, skb, cache, local); 2155 read_unlock(&mrt_lock); 2156 2157 if (local) 2158 return ip_local_deliver(skb); 2159 2160 return 0; 2161 2162 dont_forward: 2163 if (local) 2164 return ip_local_deliver(skb); 2165 kfree_skb(skb); 2166 return 0; 2167 } 2168 2169 #ifdef CONFIG_IP_PIMSM_V1 2170 /* Handle IGMP messages of PIMv1 */ 2171 int pim_rcv_v1(struct sk_buff *skb) 2172 { 2173 struct igmphdr *pim; 2174 struct net *net = dev_net(skb->dev); 2175 struct mr_table *mrt; 2176 2177 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2178 goto drop; 2179 2180 pim = igmp_hdr(skb); 2181 2182 mrt = ipmr_rt_fib_lookup(net, skb); 2183 if (IS_ERR(mrt)) 2184 goto drop; 2185 if (!mrt->mroute_do_pim || 2186 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 2187 goto drop; 2188 2189 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2190 drop: 2191 kfree_skb(skb); 2192 } 2193 return 0; 2194 } 2195 #endif 2196 2197 #ifdef CONFIG_IP_PIMSM_V2 2198 static int pim_rcv(struct sk_buff *skb) 2199 { 2200 struct pimreghdr *pim; 2201 struct net *net = dev_net(skb->dev); 2202 struct mr_table *mrt; 2203 2204 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2205 goto drop; 2206 2207 pim = (struct pimreghdr *)skb_transport_header(skb); 2208 if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) || 2209 (pim->flags & PIM_NULL_REGISTER) || 2210 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 2211 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 2212 goto drop; 2213 2214 mrt = ipmr_rt_fib_lookup(net, skb); 2215 if (IS_ERR(mrt)) 2216 goto drop; 2217 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2218 drop: 2219 kfree_skb(skb); 2220 } 2221 return 0; 2222 } 2223 #endif 2224 2225 int ipmr_get_route(struct net *net, struct sk_buff *skb, 2226 __be32 saddr, __be32 daddr, 2227 struct rtmsg *rtm, u32 portid) 2228 { 2229 struct mfc_cache *cache; 2230 struct mr_table *mrt; 2231 int err; 2232 2233 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2234 if (!mrt) 2235 return -ENOENT; 2236 2237 rcu_read_lock(); 2238 cache = ipmr_cache_find(mrt, saddr, daddr); 2239 if (!cache && skb->dev) { 2240 int vif = ipmr_find_vif(mrt, skb->dev); 2241 2242 if (vif >= 0) 2243 cache = ipmr_cache_find_any(mrt, daddr, vif); 2244 } 2245 if (!cache) { 2246 struct sk_buff *skb2; 2247 struct iphdr *iph; 2248 struct net_device *dev; 2249 int vif = -1; 2250 2251 dev = skb->dev; 2252 read_lock(&mrt_lock); 2253 if (dev) 2254 vif = ipmr_find_vif(mrt, dev); 2255 if (vif < 0) { 2256 read_unlock(&mrt_lock); 2257 rcu_read_unlock(); 2258 return -ENODEV; 2259 } 2260 2261 skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr)); 2262 if (!skb2) { 2263 read_unlock(&mrt_lock); 2264 rcu_read_unlock(); 2265 return -ENOMEM; 2266 } 2267 2268 NETLINK_CB(skb2).portid = portid; 2269 skb_push(skb2, sizeof(struct iphdr)); 2270 skb_reset_network_header(skb2); 2271 iph = ip_hdr(skb2); 2272 iph->ihl = sizeof(struct iphdr) >> 2; 2273 iph->saddr = saddr; 2274 iph->daddr = daddr; 2275 iph->version = 0; 2276 err = ipmr_cache_unresolved(mrt, vif, skb2, dev); 2277 read_unlock(&mrt_lock); 2278 rcu_read_unlock(); 2279 return err; 2280 } 2281 2282 read_lock(&mrt_lock); 2283 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2284 read_unlock(&mrt_lock); 2285 rcu_read_unlock(); 2286 return err; 2287 } 2288 2289 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2290 u32 portid, u32 seq, struct mfc_cache *c, int cmd, 2291 int flags) 2292 { 2293 struct nlmsghdr *nlh; 2294 struct rtmsg *rtm; 2295 int err; 2296 2297 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2298 if (!nlh) 2299 return -EMSGSIZE; 2300 2301 rtm = nlmsg_data(nlh); 2302 rtm->rtm_family = RTNL_FAMILY_IPMR; 2303 rtm->rtm_dst_len = 32; 2304 rtm->rtm_src_len = 32; 2305 rtm->rtm_tos = 0; 2306 rtm->rtm_table = mrt->id; 2307 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2308 goto nla_put_failure; 2309 rtm->rtm_type = RTN_MULTICAST; 2310 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2311 if (c->_c.mfc_flags & MFC_STATIC) 2312 rtm->rtm_protocol = RTPROT_STATIC; 2313 else 2314 rtm->rtm_protocol = RTPROT_MROUTED; 2315 rtm->rtm_flags = 0; 2316 2317 if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || 2318 nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) 2319 goto nla_put_failure; 2320 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2321 /* do not break the dump if cache is unresolved */ 2322 if (err < 0 && err != -ENOENT) 2323 goto nla_put_failure; 2324 2325 nlmsg_end(skb, nlh); 2326 return 0; 2327 2328 nla_put_failure: 2329 nlmsg_cancel(skb, nlh); 2330 return -EMSGSIZE; 2331 } 2332 2333 static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2334 u32 portid, u32 seq, struct mr_mfc *c, int cmd, 2335 int flags) 2336 { 2337 return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c, 2338 cmd, flags); 2339 } 2340 2341 static size_t mroute_msgsize(bool unresolved, int maxvif) 2342 { 2343 size_t len = 2344 NLMSG_ALIGN(sizeof(struct rtmsg)) 2345 + nla_total_size(4) /* RTA_TABLE */ 2346 + nla_total_size(4) /* RTA_SRC */ 2347 + nla_total_size(4) /* RTA_DST */ 2348 ; 2349 2350 if (!unresolved) 2351 len = len 2352 + nla_total_size(4) /* RTA_IIF */ 2353 + nla_total_size(0) /* RTA_MULTIPATH */ 2354 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2355 /* RTA_MFC_STATS */ 2356 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2357 ; 2358 2359 return len; 2360 } 2361 2362 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 2363 int cmd) 2364 { 2365 struct net *net = read_pnet(&mrt->net); 2366 struct sk_buff *skb; 2367 int err = -ENOBUFS; 2368 2369 skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS, 2370 mrt->maxvif), 2371 GFP_ATOMIC); 2372 if (!skb) 2373 goto errout; 2374 2375 err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2376 if (err < 0) 2377 goto errout; 2378 2379 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC); 2380 return; 2381 2382 errout: 2383 kfree_skb(skb); 2384 if (err < 0) 2385 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err); 2386 } 2387 2388 static size_t igmpmsg_netlink_msgsize(size_t payloadlen) 2389 { 2390 size_t len = 2391 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2392 + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */ 2393 + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */ 2394 + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */ 2395 + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */ 2396 /* IPMRA_CREPORT_PKT */ 2397 + nla_total_size(payloadlen) 2398 ; 2399 2400 return len; 2401 } 2402 2403 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) 2404 { 2405 struct net *net = read_pnet(&mrt->net); 2406 struct nlmsghdr *nlh; 2407 struct rtgenmsg *rtgenm; 2408 struct igmpmsg *msg; 2409 struct sk_buff *skb; 2410 struct nlattr *nla; 2411 int payloadlen; 2412 2413 payloadlen = pkt->len - sizeof(struct igmpmsg); 2414 msg = (struct igmpmsg *)skb_network_header(pkt); 2415 2416 skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2417 if (!skb) 2418 goto errout; 2419 2420 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2421 sizeof(struct rtgenmsg), 0); 2422 if (!nlh) 2423 goto errout; 2424 rtgenm = nlmsg_data(nlh); 2425 rtgenm->rtgen_family = RTNL_FAMILY_IPMR; 2426 if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) || 2427 nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif) || 2428 nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR, 2429 msg->im_src.s_addr) || 2430 nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR, 2431 msg->im_dst.s_addr)) 2432 goto nla_put_failure; 2433 2434 nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen); 2435 if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg), 2436 nla_data(nla), payloadlen)) 2437 goto nla_put_failure; 2438 2439 nlmsg_end(skb, nlh); 2440 2441 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC); 2442 return; 2443 2444 nla_put_failure: 2445 nlmsg_cancel(skb, nlh); 2446 errout: 2447 kfree_skb(skb); 2448 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); 2449 } 2450 2451 static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb, 2452 const struct nlmsghdr *nlh, 2453 struct nlattr **tb, 2454 struct netlink_ext_ack *extack) 2455 { 2456 struct rtmsg *rtm; 2457 int i, err; 2458 2459 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { 2460 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request"); 2461 return -EINVAL; 2462 } 2463 2464 if (!netlink_strict_get_check(skb)) 2465 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, 2466 rtm_ipv4_policy, extack); 2467 2468 rtm = nlmsg_data(nlh); 2469 if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || 2470 (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || 2471 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || 2472 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) { 2473 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for multicast route get request"); 2474 return -EINVAL; 2475 } 2476 2477 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, 2478 rtm_ipv4_policy, extack); 2479 if (err) 2480 return err; 2481 2482 if ((tb[RTA_SRC] && !rtm->rtm_src_len) || 2483 (tb[RTA_DST] && !rtm->rtm_dst_len)) { 2484 NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); 2485 return -EINVAL; 2486 } 2487 2488 for (i = 0; i <= RTA_MAX; i++) { 2489 if (!tb[i]) 2490 continue; 2491 2492 switch (i) { 2493 case RTA_SRC: 2494 case RTA_DST: 2495 case RTA_TABLE: 2496 break; 2497 default: 2498 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in multicast route get request"); 2499 return -EINVAL; 2500 } 2501 } 2502 2503 return 0; 2504 } 2505 2506 static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2507 struct netlink_ext_ack *extack) 2508 { 2509 struct net *net = sock_net(in_skb->sk); 2510 struct nlattr *tb[RTA_MAX + 1]; 2511 struct sk_buff *skb = NULL; 2512 struct mfc_cache *cache; 2513 struct mr_table *mrt; 2514 __be32 src, grp; 2515 u32 tableid; 2516 int err; 2517 2518 err = ipmr_rtm_valid_getroute_req(in_skb, nlh, tb, extack); 2519 if (err < 0) 2520 goto errout; 2521 2522 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; 2523 grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; 2524 tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; 2525 2526 mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); 2527 if (!mrt) { 2528 err = -ENOENT; 2529 goto errout_free; 2530 } 2531 2532 /* entries are added/deleted only under RTNL */ 2533 rcu_read_lock(); 2534 cache = ipmr_cache_find(mrt, src, grp); 2535 rcu_read_unlock(); 2536 if (!cache) { 2537 err = -ENOENT; 2538 goto errout_free; 2539 } 2540 2541 skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL); 2542 if (!skb) { 2543 err = -ENOBUFS; 2544 goto errout_free; 2545 } 2546 2547 err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, 2548 nlh->nlmsg_seq, cache, 2549 RTM_NEWROUTE, 0); 2550 if (err < 0) 2551 goto errout_free; 2552 2553 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2554 2555 errout: 2556 return err; 2557 2558 errout_free: 2559 kfree_skb(skb); 2560 goto errout; 2561 } 2562 2563 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2564 { 2565 struct fib_dump_filter filter = {}; 2566 int err; 2567 2568 if (cb->strict_check) { 2569 err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh, 2570 &filter, cb); 2571 if (err < 0) 2572 return err; 2573 } 2574 2575 if (filter.table_id) { 2576 struct mr_table *mrt; 2577 2578 mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id); 2579 if (!mrt) { 2580 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR) 2581 return skb->len; 2582 2583 NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist"); 2584 return -ENOENT; 2585 } 2586 err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute, 2587 &mfc_unres_lock, &filter); 2588 return skb->len ? : err; 2589 } 2590 2591 return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter, 2592 _ipmr_fill_mroute, &mfc_unres_lock, &filter); 2593 } 2594 2595 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { 2596 [RTA_SRC] = { .type = NLA_U32 }, 2597 [RTA_DST] = { .type = NLA_U32 }, 2598 [RTA_IIF] = { .type = NLA_U32 }, 2599 [RTA_TABLE] = { .type = NLA_U32 }, 2600 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2601 }; 2602 2603 static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol) 2604 { 2605 switch (rtm_protocol) { 2606 case RTPROT_STATIC: 2607 case RTPROT_MROUTED: 2608 return true; 2609 } 2610 return false; 2611 } 2612 2613 static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc) 2614 { 2615 struct rtnexthop *rtnh = nla_data(nla); 2616 int remaining = nla_len(nla), vifi = 0; 2617 2618 while (rtnh_ok(rtnh, remaining)) { 2619 mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops; 2620 if (++vifi == MAXVIFS) 2621 break; 2622 rtnh = rtnh_next(rtnh, &remaining); 2623 } 2624 2625 return remaining > 0 ? -EINVAL : vifi; 2626 } 2627 2628 /* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */ 2629 static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, 2630 struct mfcctl *mfcc, int *mrtsock, 2631 struct mr_table **mrtret, 2632 struct netlink_ext_ack *extack) 2633 { 2634 struct net_device *dev = NULL; 2635 u32 tblid = RT_TABLE_DEFAULT; 2636 struct mr_table *mrt; 2637 struct nlattr *attr; 2638 struct rtmsg *rtm; 2639 int ret, rem; 2640 2641 ret = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX, 2642 rtm_ipmr_policy, extack); 2643 if (ret < 0) 2644 goto out; 2645 rtm = nlmsg_data(nlh); 2646 2647 ret = -EINVAL; 2648 if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 || 2649 rtm->rtm_type != RTN_MULTICAST || 2650 rtm->rtm_scope != RT_SCOPE_UNIVERSE || 2651 !ipmr_rtm_validate_proto(rtm->rtm_protocol)) 2652 goto out; 2653 2654 memset(mfcc, 0, sizeof(*mfcc)); 2655 mfcc->mfcc_parent = -1; 2656 ret = 0; 2657 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) { 2658 switch (nla_type(attr)) { 2659 case RTA_SRC: 2660 mfcc->mfcc_origin.s_addr = nla_get_be32(attr); 2661 break; 2662 case RTA_DST: 2663 mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); 2664 break; 2665 case RTA_IIF: 2666 dev = __dev_get_by_index(net, nla_get_u32(attr)); 2667 if (!dev) { 2668 ret = -ENODEV; 2669 goto out; 2670 } 2671 break; 2672 case RTA_MULTIPATH: 2673 if (ipmr_nla_get_ttls(attr, mfcc) < 0) { 2674 ret = -EINVAL; 2675 goto out; 2676 } 2677 break; 2678 case RTA_PREFSRC: 2679 ret = 1; 2680 break; 2681 case RTA_TABLE: 2682 tblid = nla_get_u32(attr); 2683 break; 2684 } 2685 } 2686 mrt = ipmr_get_table(net, tblid); 2687 if (!mrt) { 2688 ret = -ENOENT; 2689 goto out; 2690 } 2691 *mrtret = mrt; 2692 *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; 2693 if (dev) 2694 mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); 2695 2696 out: 2697 return ret; 2698 } 2699 2700 /* takes care of both newroute and delroute */ 2701 static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, 2702 struct netlink_ext_ack *extack) 2703 { 2704 struct net *net = sock_net(skb->sk); 2705 int ret, mrtsock, parent; 2706 struct mr_table *tbl; 2707 struct mfcctl mfcc; 2708 2709 mrtsock = 0; 2710 tbl = NULL; 2711 ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack); 2712 if (ret < 0) 2713 return ret; 2714 2715 parent = ret ? mfcc.mfcc_parent : -1; 2716 if (nlh->nlmsg_type == RTM_NEWROUTE) 2717 return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); 2718 else 2719 return ipmr_mfc_delete(tbl, &mfcc, parent); 2720 } 2721 2722 static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) 2723 { 2724 u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len); 2725 2726 if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) || 2727 nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) || 2728 nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM, 2729 mrt->mroute_reg_vif_num) || 2730 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT, 2731 mrt->mroute_do_assert) || 2732 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim) || 2733 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE, 2734 mrt->mroute_do_wrvifwhole)) 2735 return false; 2736 2737 return true; 2738 } 2739 2740 static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) 2741 { 2742 struct nlattr *vif_nest; 2743 struct vif_device *vif; 2744 2745 /* if the VIF doesn't exist just continue */ 2746 if (!VIF_EXISTS(mrt, vifid)) 2747 return true; 2748 2749 vif = &mrt->vif_table[vifid]; 2750 vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF); 2751 if (!vif_nest) 2752 return false; 2753 if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) || 2754 nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || 2755 nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || 2756 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in, 2757 IPMRA_VIFA_PAD) || 2758 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out, 2759 IPMRA_VIFA_PAD) || 2760 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in, 2761 IPMRA_VIFA_PAD) || 2762 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out, 2763 IPMRA_VIFA_PAD) || 2764 nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) || 2765 nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) { 2766 nla_nest_cancel(skb, vif_nest); 2767 return false; 2768 } 2769 nla_nest_end(skb, vif_nest); 2770 2771 return true; 2772 } 2773 2774 static int ipmr_valid_dumplink(const struct nlmsghdr *nlh, 2775 struct netlink_ext_ack *extack) 2776 { 2777 struct ifinfomsg *ifm; 2778 2779 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { 2780 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for ipmr link dump"); 2781 return -EINVAL; 2782 } 2783 2784 if (nlmsg_attrlen(nlh, sizeof(*ifm))) { 2785 NL_SET_ERR_MSG(extack, "Invalid data after header in ipmr link dump"); 2786 return -EINVAL; 2787 } 2788 2789 ifm = nlmsg_data(nlh); 2790 if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || 2791 ifm->ifi_change || ifm->ifi_index) { 2792 NL_SET_ERR_MSG(extack, "Invalid values in header for ipmr link dump request"); 2793 return -EINVAL; 2794 } 2795 2796 return 0; 2797 } 2798 2799 static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) 2800 { 2801 struct net *net = sock_net(skb->sk); 2802 struct nlmsghdr *nlh = NULL; 2803 unsigned int t = 0, s_t; 2804 unsigned int e = 0, s_e; 2805 struct mr_table *mrt; 2806 2807 if (cb->strict_check) { 2808 int err = ipmr_valid_dumplink(cb->nlh, cb->extack); 2809 2810 if (err < 0) 2811 return err; 2812 } 2813 2814 s_t = cb->args[0]; 2815 s_e = cb->args[1]; 2816 2817 ipmr_for_each_table(mrt, net) { 2818 struct nlattr *vifs, *af; 2819 struct ifinfomsg *hdr; 2820 u32 i; 2821 2822 if (t < s_t) 2823 goto skip_table; 2824 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 2825 cb->nlh->nlmsg_seq, RTM_NEWLINK, 2826 sizeof(*hdr), NLM_F_MULTI); 2827 if (!nlh) 2828 break; 2829 2830 hdr = nlmsg_data(nlh); 2831 memset(hdr, 0, sizeof(*hdr)); 2832 hdr->ifi_family = RTNL_FAMILY_IPMR; 2833 2834 af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); 2835 if (!af) { 2836 nlmsg_cancel(skb, nlh); 2837 goto out; 2838 } 2839 2840 if (!ipmr_fill_table(mrt, skb)) { 2841 nlmsg_cancel(skb, nlh); 2842 goto out; 2843 } 2844 2845 vifs = nla_nest_start_noflag(skb, IPMRA_TABLE_VIFS); 2846 if (!vifs) { 2847 nla_nest_end(skb, af); 2848 nlmsg_end(skb, nlh); 2849 goto out; 2850 } 2851 for (i = 0; i < mrt->maxvif; i++) { 2852 if (e < s_e) 2853 goto skip_entry; 2854 if (!ipmr_fill_vif(mrt, i, skb)) { 2855 nla_nest_end(skb, vifs); 2856 nla_nest_end(skb, af); 2857 nlmsg_end(skb, nlh); 2858 goto out; 2859 } 2860 skip_entry: 2861 e++; 2862 } 2863 s_e = 0; 2864 e = 0; 2865 nla_nest_end(skb, vifs); 2866 nla_nest_end(skb, af); 2867 nlmsg_end(skb, nlh); 2868 skip_table: 2869 t++; 2870 } 2871 2872 out: 2873 cb->args[1] = e; 2874 cb->args[0] = t; 2875 2876 return skb->len; 2877 } 2878 2879 #ifdef CONFIG_PROC_FS 2880 /* The /proc interfaces to multicast routing : 2881 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif 2882 */ 2883 2884 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 2885 __acquires(mrt_lock) 2886 { 2887 struct mr_vif_iter *iter = seq->private; 2888 struct net *net = seq_file_net(seq); 2889 struct mr_table *mrt; 2890 2891 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2892 if (!mrt) 2893 return ERR_PTR(-ENOENT); 2894 2895 iter->mrt = mrt; 2896 2897 read_lock(&mrt_lock); 2898 return mr_vif_seq_start(seq, pos); 2899 } 2900 2901 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 2902 __releases(mrt_lock) 2903 { 2904 read_unlock(&mrt_lock); 2905 } 2906 2907 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 2908 { 2909 struct mr_vif_iter *iter = seq->private; 2910 struct mr_table *mrt = iter->mrt; 2911 2912 if (v == SEQ_START_TOKEN) { 2913 seq_puts(seq, 2914 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 2915 } else { 2916 const struct vif_device *vif = v; 2917 const char *name = vif->dev ? 2918 vif->dev->name : "none"; 2919 2920 seq_printf(seq, 2921 "%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 2922 vif - mrt->vif_table, 2923 name, vif->bytes_in, vif->pkt_in, 2924 vif->bytes_out, vif->pkt_out, 2925 vif->flags, vif->local, vif->remote); 2926 } 2927 return 0; 2928 } 2929 2930 static const struct seq_operations ipmr_vif_seq_ops = { 2931 .start = ipmr_vif_seq_start, 2932 .next = mr_vif_seq_next, 2933 .stop = ipmr_vif_seq_stop, 2934 .show = ipmr_vif_seq_show, 2935 }; 2936 2937 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 2938 { 2939 struct net *net = seq_file_net(seq); 2940 struct mr_table *mrt; 2941 2942 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2943 if (!mrt) 2944 return ERR_PTR(-ENOENT); 2945 2946 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 2947 } 2948 2949 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 2950 { 2951 int n; 2952 2953 if (v == SEQ_START_TOKEN) { 2954 seq_puts(seq, 2955 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 2956 } else { 2957 const struct mfc_cache *mfc = v; 2958 const struct mr_mfc_iter *it = seq->private; 2959 const struct mr_table *mrt = it->mrt; 2960 2961 seq_printf(seq, "%08X %08X %-3hd", 2962 (__force u32) mfc->mfc_mcastgrp, 2963 (__force u32) mfc->mfc_origin, 2964 mfc->_c.mfc_parent); 2965 2966 if (it->cache != &mrt->mfc_unres_queue) { 2967 seq_printf(seq, " %8lu %8lu %8lu", 2968 mfc->_c.mfc_un.res.pkt, 2969 mfc->_c.mfc_un.res.bytes, 2970 mfc->_c.mfc_un.res.wrong_if); 2971 for (n = mfc->_c.mfc_un.res.minvif; 2972 n < mfc->_c.mfc_un.res.maxvif; n++) { 2973 if (VIF_EXISTS(mrt, n) && 2974 mfc->_c.mfc_un.res.ttls[n] < 255) 2975 seq_printf(seq, 2976 " %2d:%-3d", 2977 n, mfc->_c.mfc_un.res.ttls[n]); 2978 } 2979 } else { 2980 /* unresolved mfc_caches don't contain 2981 * pkt, bytes and wrong_if values 2982 */ 2983 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 2984 } 2985 seq_putc(seq, '\n'); 2986 } 2987 return 0; 2988 } 2989 2990 static const struct seq_operations ipmr_mfc_seq_ops = { 2991 .start = ipmr_mfc_seq_start, 2992 .next = mr_mfc_seq_next, 2993 .stop = mr_mfc_seq_stop, 2994 .show = ipmr_mfc_seq_show, 2995 }; 2996 #endif 2997 2998 #ifdef CONFIG_IP_PIMSM_V2 2999 static const struct net_protocol pim_protocol = { 3000 .handler = pim_rcv, 3001 .netns_ok = 1, 3002 }; 3003 #endif 3004 3005 static unsigned int ipmr_seq_read(struct net *net) 3006 { 3007 ASSERT_RTNL(); 3008 3009 return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net); 3010 } 3011 3012 static int ipmr_dump(struct net *net, struct notifier_block *nb, 3013 struct netlink_ext_ack *extack) 3014 { 3015 return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump, 3016 ipmr_mr_table_iter, &mrt_lock, extack); 3017 } 3018 3019 static const struct fib_notifier_ops ipmr_notifier_ops_template = { 3020 .family = RTNL_FAMILY_IPMR, 3021 .fib_seq_read = ipmr_seq_read, 3022 .fib_dump = ipmr_dump, 3023 .owner = THIS_MODULE, 3024 }; 3025 3026 static int __net_init ipmr_notifier_init(struct net *net) 3027 { 3028 struct fib_notifier_ops *ops; 3029 3030 net->ipv4.ipmr_seq = 0; 3031 3032 ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net); 3033 if (IS_ERR(ops)) 3034 return PTR_ERR(ops); 3035 net->ipv4.ipmr_notifier_ops = ops; 3036 3037 return 0; 3038 } 3039 3040 static void __net_exit ipmr_notifier_exit(struct net *net) 3041 { 3042 fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops); 3043 net->ipv4.ipmr_notifier_ops = NULL; 3044 } 3045 3046 /* Setup for IP multicast routing */ 3047 static int __net_init ipmr_net_init(struct net *net) 3048 { 3049 int err; 3050 3051 err = ipmr_notifier_init(net); 3052 if (err) 3053 goto ipmr_notifier_fail; 3054 3055 err = ipmr_rules_init(net); 3056 if (err < 0) 3057 goto ipmr_rules_fail; 3058 3059 #ifdef CONFIG_PROC_FS 3060 err = -ENOMEM; 3061 if (!proc_create_net("ip_mr_vif", 0, net->proc_net, &ipmr_vif_seq_ops, 3062 sizeof(struct mr_vif_iter))) 3063 goto proc_vif_fail; 3064 if (!proc_create_net("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 3065 sizeof(struct mr_mfc_iter))) 3066 goto proc_cache_fail; 3067 #endif 3068 return 0; 3069 3070 #ifdef CONFIG_PROC_FS 3071 proc_cache_fail: 3072 remove_proc_entry("ip_mr_vif", net->proc_net); 3073 proc_vif_fail: 3074 ipmr_rules_exit(net); 3075 #endif 3076 ipmr_rules_fail: 3077 ipmr_notifier_exit(net); 3078 ipmr_notifier_fail: 3079 return err; 3080 } 3081 3082 static void __net_exit ipmr_net_exit(struct net *net) 3083 { 3084 #ifdef CONFIG_PROC_FS 3085 remove_proc_entry("ip_mr_cache", net->proc_net); 3086 remove_proc_entry("ip_mr_vif", net->proc_net); 3087 #endif 3088 ipmr_notifier_exit(net); 3089 ipmr_rules_exit(net); 3090 } 3091 3092 static struct pernet_operations ipmr_net_ops = { 3093 .init = ipmr_net_init, 3094 .exit = ipmr_net_exit, 3095 }; 3096 3097 int __init ip_mr_init(void) 3098 { 3099 int err; 3100 3101 mrt_cachep = kmem_cache_create("ip_mrt_cache", 3102 sizeof(struct mfc_cache), 3103 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, 3104 NULL); 3105 3106 err = register_pernet_subsys(&ipmr_net_ops); 3107 if (err) 3108 goto reg_pernet_fail; 3109 3110 err = register_netdevice_notifier(&ip_mr_notifier); 3111 if (err) 3112 goto reg_notif_fail; 3113 #ifdef CONFIG_IP_PIMSM_V2 3114 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { 3115 pr_err("%s: can't add PIM protocol\n", __func__); 3116 err = -EAGAIN; 3117 goto add_proto_fail; 3118 } 3119 #endif 3120 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, 3121 ipmr_rtm_getroute, ipmr_rtm_dumproute, 0); 3122 rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, 3123 ipmr_rtm_route, NULL, 0); 3124 rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, 3125 ipmr_rtm_route, NULL, 0); 3126 3127 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK, 3128 NULL, ipmr_rtm_dumplink, 0); 3129 return 0; 3130 3131 #ifdef CONFIG_IP_PIMSM_V2 3132 add_proto_fail: 3133 unregister_netdevice_notifier(&ip_mr_notifier); 3134 #endif 3135 reg_notif_fail: 3136 unregister_pernet_subsys(&ipmr_net_ops); 3137 reg_pernet_fail: 3138 kmem_cache_destroy(mrt_cachep); 3139 return err; 3140 } 3141