1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IP multicast routing support for mrouted 3.6/3.8 4 * 5 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * Linux Consultancy and Custom Driver Development 7 * 8 * Fixes: 9 * Michael Chastain : Incorrect size of copying. 10 * Alan Cox : Added the cache manager code 11 * Alan Cox : Fixed the clone/copy bug and device race. 12 * Mike McLagan : Routing by source 13 * Malcolm Beattie : Buffer handling fixes. 14 * Alexey Kuznetsov : Double buffer free and other fixes. 15 * SVR Anand : Fixed several multicast bugs and problems. 16 * Alexey Kuznetsov : Status, optimisations and more. 17 * Brad Parker : Better behaviour on mrouted upcall 18 * overflow. 19 * Carlos Picoto : PIMv1 Support 20 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 21 * Relax this requirement to work with older peers. 22 */ 23 24 #include <linux/uaccess.h> 25 #include <linux/types.h> 26 #include <linux/cache.h> 27 #include <linux/capability.h> 28 #include <linux/errno.h> 29 #include <linux/mm.h> 30 #include <linux/kernel.h> 31 #include <linux/fcntl.h> 32 #include <linux/stat.h> 33 #include <linux/socket.h> 34 #include <linux/in.h> 35 #include <linux/inet.h> 36 #include <linux/netdevice.h> 37 #include <linux/inetdevice.h> 38 #include <linux/igmp.h> 39 #include <linux/proc_fs.h> 40 #include <linux/seq_file.h> 41 #include <linux/mroute.h> 42 #include <linux/init.h> 43 #include <linux/if_ether.h> 44 #include <linux/slab.h> 45 #include <net/net_namespace.h> 46 #include <net/ip.h> 47 #include <net/protocol.h> 48 #include <linux/skbuff.h> 49 #include <net/route.h> 50 #include <net/icmp.h> 51 #include <net/udp.h> 52 #include <net/raw.h> 53 #include <linux/notifier.h> 54 #include <linux/if_arp.h> 55 #include <linux/netfilter_ipv4.h> 56 #include <linux/compat.h> 57 #include <linux/export.h> 58 #include <linux/rhashtable.h> 59 #include <net/ip_tunnels.h> 60 #include <net/checksum.h> 61 #include <net/netlink.h> 62 #include <net/fib_rules.h> 63 #include <linux/netconf.h> 64 #include <net/rtnh.h> 65 66 #include <linux/nospec.h> 67 68 struct ipmr_rule { 69 struct fib_rule common; 70 }; 71 72 struct ipmr_result { 73 struct mr_table *mrt; 74 }; 75 76 /* Big lock, protecting vif table, mrt cache and mroute socket state. 77 * Note that the changes are semaphored via rtnl_lock. 78 */ 79 80 static DEFINE_RWLOCK(mrt_lock); 81 82 /* Multicast router control variables */ 83 84 /* Special spinlock for queue of unresolved entries */ 85 static DEFINE_SPINLOCK(mfc_unres_lock); 86 87 /* We return to original Alan's scheme. Hash table of resolved 88 * entries is changed only in process context and protected 89 * with weak lock mrt_lock. Queue of unresolved entries is protected 90 * with strong spinlock mfc_unres_lock. 91 * 92 * In this case data path is free of exclusive locks at all. 93 */ 94 95 static struct kmem_cache *mrt_cachep __ro_after_init; 96 97 static struct mr_table *ipmr_new_table(struct net *net, u32 id); 98 static void ipmr_free_table(struct mr_table *mrt); 99 100 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 101 struct net_device *dev, struct sk_buff *skb, 102 struct mfc_cache *cache, int local); 103 static int ipmr_cache_report(struct mr_table *mrt, 104 struct sk_buff *pkt, vifi_t vifi, int assert); 105 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 106 int cmd); 107 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); 108 static void mroute_clean_tables(struct mr_table *mrt, int flags); 109 static void ipmr_expire_process(struct timer_list *t); 110 111 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 112 #define ipmr_for_each_table(mrt, net) \ 113 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \ 114 lockdep_rtnl_is_held() || \ 115 list_empty(&net->ipv4.mr_tables)) 116 117 static struct mr_table *ipmr_mr_table_iter(struct net *net, 118 struct mr_table *mrt) 119 { 120 struct mr_table *ret; 121 122 if (!mrt) 123 ret = list_entry_rcu(net->ipv4.mr_tables.next, 124 struct mr_table, list); 125 else 126 ret = list_entry_rcu(mrt->list.next, 127 struct mr_table, list); 128 129 if (&ret->list == &net->ipv4.mr_tables) 130 return NULL; 131 return ret; 132 } 133 134 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 135 { 136 struct mr_table *mrt; 137 138 ipmr_for_each_table(mrt, net) { 139 if (mrt->id == id) 140 return mrt; 141 } 142 return NULL; 143 } 144 145 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 146 struct mr_table **mrt) 147 { 148 int err; 149 struct ipmr_result res; 150 struct fib_lookup_arg arg = { 151 .result = &res, 152 .flags = FIB_LOOKUP_NOREF, 153 }; 154 155 /* update flow if oif or iif point to device enslaved to l3mdev */ 156 l3mdev_update_flow(net, flowi4_to_flowi(flp4)); 157 158 err = fib_rules_lookup(net->ipv4.mr_rules_ops, 159 flowi4_to_flowi(flp4), 0, &arg); 160 if (err < 0) 161 return err; 162 *mrt = res.mrt; 163 return 0; 164 } 165 166 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, 167 int flags, struct fib_lookup_arg *arg) 168 { 169 struct ipmr_result *res = arg->result; 170 struct mr_table *mrt; 171 172 switch (rule->action) { 173 case FR_ACT_TO_TBL: 174 break; 175 case FR_ACT_UNREACHABLE: 176 return -ENETUNREACH; 177 case FR_ACT_PROHIBIT: 178 return -EACCES; 179 case FR_ACT_BLACKHOLE: 180 default: 181 return -EINVAL; 182 } 183 184 arg->table = fib_rule_get_table(rule, arg); 185 186 mrt = ipmr_get_table(rule->fr_net, arg->table); 187 if (!mrt) 188 return -EAGAIN; 189 res->mrt = mrt; 190 return 0; 191 } 192 193 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 194 { 195 return 1; 196 } 197 198 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 199 struct fib_rule_hdr *frh, struct nlattr **tb, 200 struct netlink_ext_ack *extack) 201 { 202 return 0; 203 } 204 205 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 206 struct nlattr **tb) 207 { 208 return 1; 209 } 210 211 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 212 struct fib_rule_hdr *frh) 213 { 214 frh->dst_len = 0; 215 frh->src_len = 0; 216 frh->tos = 0; 217 return 0; 218 } 219 220 static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { 221 .family = RTNL_FAMILY_IPMR, 222 .rule_size = sizeof(struct ipmr_rule), 223 .addr_size = sizeof(u32), 224 .action = ipmr_rule_action, 225 .match = ipmr_rule_match, 226 .configure = ipmr_rule_configure, 227 .compare = ipmr_rule_compare, 228 .fill = ipmr_rule_fill, 229 .nlgroup = RTNLGRP_IPV4_RULE, 230 .owner = THIS_MODULE, 231 }; 232 233 static int __net_init ipmr_rules_init(struct net *net) 234 { 235 struct fib_rules_ops *ops; 236 struct mr_table *mrt; 237 int err; 238 239 ops = fib_rules_register(&ipmr_rules_ops_template, net); 240 if (IS_ERR(ops)) 241 return PTR_ERR(ops); 242 243 INIT_LIST_HEAD(&net->ipv4.mr_tables); 244 245 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 246 if (IS_ERR(mrt)) { 247 err = PTR_ERR(mrt); 248 goto err1; 249 } 250 251 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); 252 if (err < 0) 253 goto err2; 254 255 net->ipv4.mr_rules_ops = ops; 256 return 0; 257 258 err2: 259 rtnl_lock(); 260 ipmr_free_table(mrt); 261 rtnl_unlock(); 262 err1: 263 fib_rules_unregister(ops); 264 return err; 265 } 266 267 static void __net_exit ipmr_rules_exit(struct net *net) 268 { 269 struct mr_table *mrt, *next; 270 271 rtnl_lock(); 272 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { 273 list_del(&mrt->list); 274 ipmr_free_table(mrt); 275 } 276 fib_rules_unregister(net->ipv4.mr_rules_ops); 277 rtnl_unlock(); 278 } 279 280 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, 281 struct netlink_ext_ack *extack) 282 { 283 return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR, extack); 284 } 285 286 static unsigned int ipmr_rules_seq_read(struct net *net) 287 { 288 return fib_rules_seq_read(net, RTNL_FAMILY_IPMR); 289 } 290 291 bool ipmr_rule_default(const struct fib_rule *rule) 292 { 293 return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT; 294 } 295 EXPORT_SYMBOL(ipmr_rule_default); 296 #else 297 #define ipmr_for_each_table(mrt, net) \ 298 for (mrt = net->ipv4.mrt; mrt; mrt = NULL) 299 300 static struct mr_table *ipmr_mr_table_iter(struct net *net, 301 struct mr_table *mrt) 302 { 303 if (!mrt) 304 return net->ipv4.mrt; 305 return NULL; 306 } 307 308 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 309 { 310 return net->ipv4.mrt; 311 } 312 313 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 314 struct mr_table **mrt) 315 { 316 *mrt = net->ipv4.mrt; 317 return 0; 318 } 319 320 static int __net_init ipmr_rules_init(struct net *net) 321 { 322 struct mr_table *mrt; 323 324 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 325 if (IS_ERR(mrt)) 326 return PTR_ERR(mrt); 327 net->ipv4.mrt = mrt; 328 return 0; 329 } 330 331 static void __net_exit ipmr_rules_exit(struct net *net) 332 { 333 rtnl_lock(); 334 ipmr_free_table(net->ipv4.mrt); 335 net->ipv4.mrt = NULL; 336 rtnl_unlock(); 337 } 338 339 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, 340 struct netlink_ext_ack *extack) 341 { 342 return 0; 343 } 344 345 static unsigned int ipmr_rules_seq_read(struct net *net) 346 { 347 return 0; 348 } 349 350 bool ipmr_rule_default(const struct fib_rule *rule) 351 { 352 return true; 353 } 354 EXPORT_SYMBOL(ipmr_rule_default); 355 #endif 356 357 static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, 358 const void *ptr) 359 { 360 const struct mfc_cache_cmp_arg *cmparg = arg->key; 361 struct mfc_cache *c = (struct mfc_cache *)ptr; 362 363 return cmparg->mfc_mcastgrp != c->mfc_mcastgrp || 364 cmparg->mfc_origin != c->mfc_origin; 365 } 366 367 static const struct rhashtable_params ipmr_rht_params = { 368 .head_offset = offsetof(struct mr_mfc, mnode), 369 .key_offset = offsetof(struct mfc_cache, cmparg), 370 .key_len = sizeof(struct mfc_cache_cmp_arg), 371 .nelem_hint = 3, 372 .obj_cmpfn = ipmr_hash_cmp, 373 .automatic_shrinking = true, 374 }; 375 376 static void ipmr_new_table_set(struct mr_table *mrt, 377 struct net *net) 378 { 379 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 380 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); 381 #endif 382 } 383 384 static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = { 385 .mfc_mcastgrp = htonl(INADDR_ANY), 386 .mfc_origin = htonl(INADDR_ANY), 387 }; 388 389 static struct mr_table_ops ipmr_mr_table_ops = { 390 .rht_params = &ipmr_rht_params, 391 .cmparg_any = &ipmr_mr_table_ops_cmparg_any, 392 }; 393 394 static struct mr_table *ipmr_new_table(struct net *net, u32 id) 395 { 396 struct mr_table *mrt; 397 398 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ 399 if (id != RT_TABLE_DEFAULT && id >= 1000000000) 400 return ERR_PTR(-EINVAL); 401 402 mrt = ipmr_get_table(net, id); 403 if (mrt) 404 return mrt; 405 406 return mr_table_alloc(net, id, &ipmr_mr_table_ops, 407 ipmr_expire_process, ipmr_new_table_set); 408 } 409 410 static void ipmr_free_table(struct mr_table *mrt) 411 { 412 del_timer_sync(&mrt->ipmr_expire_timer); 413 mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | 414 MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC); 415 rhltable_destroy(&mrt->mfc_hash); 416 kfree(mrt); 417 } 418 419 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 420 421 /* Initialize ipmr pimreg/tunnel in_device */ 422 static bool ipmr_init_vif_indev(const struct net_device *dev) 423 { 424 struct in_device *in_dev; 425 426 ASSERT_RTNL(); 427 428 in_dev = __in_dev_get_rtnl(dev); 429 if (!in_dev) 430 return false; 431 ipv4_devconf_setall(in_dev); 432 neigh_parms_data_state_setall(in_dev->arp_parms); 433 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 434 435 return true; 436 } 437 438 static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) 439 { 440 struct net_device *tunnel_dev, *new_dev; 441 struct ip_tunnel_parm p = { }; 442 int err; 443 444 tunnel_dev = __dev_get_by_name(net, "tunl0"); 445 if (!tunnel_dev) 446 goto out; 447 448 p.iph.daddr = v->vifc_rmt_addr.s_addr; 449 p.iph.saddr = v->vifc_lcl_addr.s_addr; 450 p.iph.version = 4; 451 p.iph.ihl = 5; 452 p.iph.protocol = IPPROTO_IPIP; 453 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 454 455 if (!tunnel_dev->netdev_ops->ndo_tunnel_ctl) 456 goto out; 457 err = tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, 458 SIOCADDTUNNEL); 459 if (err) 460 goto out; 461 462 new_dev = __dev_get_by_name(net, p.name); 463 if (!new_dev) 464 goto out; 465 466 new_dev->flags |= IFF_MULTICAST; 467 if (!ipmr_init_vif_indev(new_dev)) 468 goto out_unregister; 469 if (dev_open(new_dev, NULL)) 470 goto out_unregister; 471 dev_hold(new_dev); 472 err = dev_set_allmulti(new_dev, 1); 473 if (err) { 474 dev_close(new_dev); 475 tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, 476 SIOCDELTUNNEL); 477 dev_put(new_dev); 478 new_dev = ERR_PTR(err); 479 } 480 return new_dev; 481 482 out_unregister: 483 unregister_netdevice(new_dev); 484 out: 485 return ERR_PTR(-ENOBUFS); 486 } 487 488 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 489 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 490 { 491 struct net *net = dev_net(dev); 492 struct mr_table *mrt; 493 struct flowi4 fl4 = { 494 .flowi4_oif = dev->ifindex, 495 .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 496 .flowi4_mark = skb->mark, 497 }; 498 int err; 499 500 err = ipmr_fib_lookup(net, &fl4, &mrt); 501 if (err < 0) { 502 kfree_skb(skb); 503 return err; 504 } 505 506 read_lock(&mrt_lock); 507 dev->stats.tx_bytes += skb->len; 508 dev->stats.tx_packets++; 509 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); 510 read_unlock(&mrt_lock); 511 kfree_skb(skb); 512 return NETDEV_TX_OK; 513 } 514 515 static int reg_vif_get_iflink(const struct net_device *dev) 516 { 517 return 0; 518 } 519 520 static const struct net_device_ops reg_vif_netdev_ops = { 521 .ndo_start_xmit = reg_vif_xmit, 522 .ndo_get_iflink = reg_vif_get_iflink, 523 }; 524 525 static void reg_vif_setup(struct net_device *dev) 526 { 527 dev->type = ARPHRD_PIMREG; 528 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 529 dev->flags = IFF_NOARP; 530 dev->netdev_ops = ®_vif_netdev_ops; 531 dev->needs_free_netdev = true; 532 dev->features |= NETIF_F_NETNS_LOCAL; 533 } 534 535 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 536 { 537 struct net_device *dev; 538 char name[IFNAMSIZ]; 539 540 if (mrt->id == RT_TABLE_DEFAULT) 541 sprintf(name, "pimreg"); 542 else 543 sprintf(name, "pimreg%u", mrt->id); 544 545 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 546 547 if (!dev) 548 return NULL; 549 550 dev_net_set(dev, net); 551 552 if (register_netdevice(dev)) { 553 free_netdev(dev); 554 return NULL; 555 } 556 557 if (!ipmr_init_vif_indev(dev)) 558 goto failure; 559 if (dev_open(dev, NULL)) 560 goto failure; 561 562 dev_hold(dev); 563 564 return dev; 565 566 failure: 567 unregister_netdevice(dev); 568 return NULL; 569 } 570 571 /* called with rcu_read_lock() */ 572 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 573 unsigned int pimlen) 574 { 575 struct net_device *reg_dev = NULL; 576 struct iphdr *encap; 577 578 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 579 /* Check that: 580 * a. packet is really sent to a multicast group 581 * b. packet is not a NULL-REGISTER 582 * c. packet is not truncated 583 */ 584 if (!ipv4_is_multicast(encap->daddr) || 585 encap->tot_len == 0 || 586 ntohs(encap->tot_len) + pimlen > skb->len) 587 return 1; 588 589 read_lock(&mrt_lock); 590 if (mrt->mroute_reg_vif_num >= 0) 591 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; 592 read_unlock(&mrt_lock); 593 594 if (!reg_dev) 595 return 1; 596 597 skb->mac_header = skb->network_header; 598 skb_pull(skb, (u8 *)encap - skb->data); 599 skb_reset_network_header(skb); 600 skb->protocol = htons(ETH_P_IP); 601 skb->ip_summed = CHECKSUM_NONE; 602 603 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 604 605 netif_rx(skb); 606 607 return NET_RX_SUCCESS; 608 } 609 #else 610 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 611 { 612 return NULL; 613 } 614 #endif 615 616 static int call_ipmr_vif_entry_notifiers(struct net *net, 617 enum fib_event_type event_type, 618 struct vif_device *vif, 619 vifi_t vif_index, u32 tb_id) 620 { 621 return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type, 622 vif, vif_index, tb_id, 623 &net->ipv4.ipmr_seq); 624 } 625 626 static int call_ipmr_mfc_entry_notifiers(struct net *net, 627 enum fib_event_type event_type, 628 struct mfc_cache *mfc, u32 tb_id) 629 { 630 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type, 631 &mfc->_c, tb_id, &net->ipv4.ipmr_seq); 632 } 633 634 /** 635 * vif_delete - Delete a VIF entry 636 * @mrt: Table to delete from 637 * @vifi: VIF identifier to delete 638 * @notify: Set to 1, if the caller is a notifier_call 639 * @head: if unregistering the VIF, place it on this queue 640 */ 641 static int vif_delete(struct mr_table *mrt, int vifi, int notify, 642 struct list_head *head) 643 { 644 struct net *net = read_pnet(&mrt->net); 645 struct vif_device *v; 646 struct net_device *dev; 647 struct in_device *in_dev; 648 649 if (vifi < 0 || vifi >= mrt->maxvif) 650 return -EADDRNOTAVAIL; 651 652 v = &mrt->vif_table[vifi]; 653 654 if (VIF_EXISTS(mrt, vifi)) 655 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi, 656 mrt->id); 657 658 write_lock_bh(&mrt_lock); 659 dev = v->dev; 660 v->dev = NULL; 661 662 if (!dev) { 663 write_unlock_bh(&mrt_lock); 664 return -EADDRNOTAVAIL; 665 } 666 667 if (vifi == mrt->mroute_reg_vif_num) 668 mrt->mroute_reg_vif_num = -1; 669 670 if (vifi + 1 == mrt->maxvif) { 671 int tmp; 672 673 for (tmp = vifi - 1; tmp >= 0; tmp--) { 674 if (VIF_EXISTS(mrt, tmp)) 675 break; 676 } 677 mrt->maxvif = tmp+1; 678 } 679 680 write_unlock_bh(&mrt_lock); 681 682 dev_set_allmulti(dev, -1); 683 684 in_dev = __in_dev_get_rtnl(dev); 685 if (in_dev) { 686 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 687 inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 688 NETCONFA_MC_FORWARDING, 689 dev->ifindex, &in_dev->cnf); 690 ip_rt_multicast_event(in_dev); 691 } 692 693 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify) 694 unregister_netdevice_queue(dev, head); 695 696 dev_put_track(dev, &v->dev_tracker); 697 return 0; 698 } 699 700 static void ipmr_cache_free_rcu(struct rcu_head *head) 701 { 702 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 703 704 kmem_cache_free(mrt_cachep, (struct mfc_cache *)c); 705 } 706 707 static void ipmr_cache_free(struct mfc_cache *c) 708 { 709 call_rcu(&c->_c.rcu, ipmr_cache_free_rcu); 710 } 711 712 /* Destroy an unresolved cache entry, killing queued skbs 713 * and reporting error to netlink readers. 714 */ 715 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 716 { 717 struct net *net = read_pnet(&mrt->net); 718 struct sk_buff *skb; 719 struct nlmsgerr *e; 720 721 atomic_dec(&mrt->cache_resolve_queue_len); 722 723 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) { 724 if (ip_hdr(skb)->version == 0) { 725 struct nlmsghdr *nlh = skb_pull(skb, 726 sizeof(struct iphdr)); 727 nlh->nlmsg_type = NLMSG_ERROR; 728 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 729 skb_trim(skb, nlh->nlmsg_len); 730 e = nlmsg_data(nlh); 731 e->error = -ETIMEDOUT; 732 memset(&e->msg, 0, sizeof(e->msg)); 733 734 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 735 } else { 736 kfree_skb(skb); 737 } 738 } 739 740 ipmr_cache_free(c); 741 } 742 743 /* Timer process for the unresolved queue. */ 744 static void ipmr_expire_process(struct timer_list *t) 745 { 746 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); 747 struct mr_mfc *c, *next; 748 unsigned long expires; 749 unsigned long now; 750 751 if (!spin_trylock(&mfc_unres_lock)) { 752 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); 753 return; 754 } 755 756 if (list_empty(&mrt->mfc_unres_queue)) 757 goto out; 758 759 now = jiffies; 760 expires = 10*HZ; 761 762 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 763 if (time_after(c->mfc_un.unres.expires, now)) { 764 unsigned long interval = c->mfc_un.unres.expires - now; 765 if (interval < expires) 766 expires = interval; 767 continue; 768 } 769 770 list_del(&c->list); 771 mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE); 772 ipmr_destroy_unres(mrt, (struct mfc_cache *)c); 773 } 774 775 if (!list_empty(&mrt->mfc_unres_queue)) 776 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 777 778 out: 779 spin_unlock(&mfc_unres_lock); 780 } 781 782 /* Fill oifs list. It is called under write locked mrt_lock. */ 783 static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, 784 unsigned char *ttls) 785 { 786 int vifi; 787 788 cache->mfc_un.res.minvif = MAXVIFS; 789 cache->mfc_un.res.maxvif = 0; 790 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 791 792 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 793 if (VIF_EXISTS(mrt, vifi) && 794 ttls[vifi] && ttls[vifi] < 255) { 795 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 796 if (cache->mfc_un.res.minvif > vifi) 797 cache->mfc_un.res.minvif = vifi; 798 if (cache->mfc_un.res.maxvif <= vifi) 799 cache->mfc_un.res.maxvif = vifi + 1; 800 } 801 } 802 cache->mfc_un.res.lastuse = jiffies; 803 } 804 805 static int vif_add(struct net *net, struct mr_table *mrt, 806 struct vifctl *vifc, int mrtsock) 807 { 808 struct netdev_phys_item_id ppid = { }; 809 int vifi = vifc->vifc_vifi; 810 struct vif_device *v = &mrt->vif_table[vifi]; 811 struct net_device *dev; 812 struct in_device *in_dev; 813 int err; 814 815 /* Is vif busy ? */ 816 if (VIF_EXISTS(mrt, vifi)) 817 return -EADDRINUSE; 818 819 switch (vifc->vifc_flags) { 820 case VIFF_REGISTER: 821 if (!ipmr_pimsm_enabled()) 822 return -EINVAL; 823 /* Special Purpose VIF in PIM 824 * All the packets will be sent to the daemon 825 */ 826 if (mrt->mroute_reg_vif_num >= 0) 827 return -EADDRINUSE; 828 dev = ipmr_reg_vif(net, mrt); 829 if (!dev) 830 return -ENOBUFS; 831 err = dev_set_allmulti(dev, 1); 832 if (err) { 833 unregister_netdevice(dev); 834 dev_put(dev); 835 return err; 836 } 837 break; 838 case VIFF_TUNNEL: 839 dev = ipmr_new_tunnel(net, vifc); 840 if (IS_ERR(dev)) 841 return PTR_ERR(dev); 842 break; 843 case VIFF_USE_IFINDEX: 844 case 0: 845 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 846 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 847 if (dev && !__in_dev_get_rtnl(dev)) { 848 dev_put(dev); 849 return -EADDRNOTAVAIL; 850 } 851 } else { 852 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 853 } 854 if (!dev) 855 return -EADDRNOTAVAIL; 856 err = dev_set_allmulti(dev, 1); 857 if (err) { 858 dev_put(dev); 859 return err; 860 } 861 break; 862 default: 863 return -EINVAL; 864 } 865 866 in_dev = __in_dev_get_rtnl(dev); 867 if (!in_dev) { 868 dev_put(dev); 869 return -EADDRNOTAVAIL; 870 } 871 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 872 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, 873 dev->ifindex, &in_dev->cnf); 874 ip_rt_multicast_event(in_dev); 875 876 /* Fill in the VIF structures */ 877 vif_device_init(v, dev, vifc->vifc_rate_limit, 878 vifc->vifc_threshold, 879 vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0), 880 (VIFF_TUNNEL | VIFF_REGISTER)); 881 882 err = dev_get_port_parent_id(dev, &ppid, true); 883 if (err == 0) { 884 memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len); 885 v->dev_parent_id.id_len = ppid.id_len; 886 } else { 887 v->dev_parent_id.id_len = 0; 888 } 889 890 v->local = vifc->vifc_lcl_addr.s_addr; 891 v->remote = vifc->vifc_rmt_addr.s_addr; 892 893 /* And finish update writing critical data */ 894 write_lock_bh(&mrt_lock); 895 v->dev = dev; 896 netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); 897 if (v->flags & VIFF_REGISTER) 898 mrt->mroute_reg_vif_num = vifi; 899 if (vifi+1 > mrt->maxvif) 900 mrt->maxvif = vifi+1; 901 write_unlock_bh(&mrt_lock); 902 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id); 903 return 0; 904 } 905 906 /* called with rcu_read_lock() */ 907 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 908 __be32 origin, 909 __be32 mcastgrp) 910 { 911 struct mfc_cache_cmp_arg arg = { 912 .mfc_mcastgrp = mcastgrp, 913 .mfc_origin = origin 914 }; 915 916 return mr_mfc_find(mrt, &arg); 917 } 918 919 /* Look for a (*,G) entry */ 920 static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, 921 __be32 mcastgrp, int vifi) 922 { 923 struct mfc_cache_cmp_arg arg = { 924 .mfc_mcastgrp = mcastgrp, 925 .mfc_origin = htonl(INADDR_ANY) 926 }; 927 928 if (mcastgrp == htonl(INADDR_ANY)) 929 return mr_mfc_find_any_parent(mrt, vifi); 930 return mr_mfc_find_any(mrt, vifi, &arg); 931 } 932 933 /* Look for a (S,G,iif) entry if parent != -1 */ 934 static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt, 935 __be32 origin, __be32 mcastgrp, 936 int parent) 937 { 938 struct mfc_cache_cmp_arg arg = { 939 .mfc_mcastgrp = mcastgrp, 940 .mfc_origin = origin, 941 }; 942 943 return mr_mfc_find_parent(mrt, &arg, parent); 944 } 945 946 /* Allocate a multicast cache entry */ 947 static struct mfc_cache *ipmr_cache_alloc(void) 948 { 949 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 950 951 if (c) { 952 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 953 c->_c.mfc_un.res.minvif = MAXVIFS; 954 c->_c.free = ipmr_cache_free_rcu; 955 refcount_set(&c->_c.mfc_un.res.refcount, 1); 956 } 957 return c; 958 } 959 960 static struct mfc_cache *ipmr_cache_alloc_unres(void) 961 { 962 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 963 964 if (c) { 965 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 966 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 967 } 968 return c; 969 } 970 971 /* A cache entry has gone into a resolved state from queued */ 972 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, 973 struct mfc_cache *uc, struct mfc_cache *c) 974 { 975 struct sk_buff *skb; 976 struct nlmsgerr *e; 977 978 /* Play the pending entries through our router */ 979 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 980 if (ip_hdr(skb)->version == 0) { 981 struct nlmsghdr *nlh = skb_pull(skb, 982 sizeof(struct iphdr)); 983 984 if (mr_fill_mroute(mrt, skb, &c->_c, 985 nlmsg_data(nlh)) > 0) { 986 nlh->nlmsg_len = skb_tail_pointer(skb) - 987 (u8 *)nlh; 988 } else { 989 nlh->nlmsg_type = NLMSG_ERROR; 990 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 991 skb_trim(skb, nlh->nlmsg_len); 992 e = nlmsg_data(nlh); 993 e->error = -EMSGSIZE; 994 memset(&e->msg, 0, sizeof(e->msg)); 995 } 996 997 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 998 } else { 999 ip_mr_forward(net, mrt, skb->dev, skb, c, 0); 1000 } 1001 } 1002 } 1003 1004 /* Bounce a cache query up to mrouted and netlink. 1005 * 1006 * Called under mrt_lock. 1007 */ 1008 static int ipmr_cache_report(struct mr_table *mrt, 1009 struct sk_buff *pkt, vifi_t vifi, int assert) 1010 { 1011 const int ihl = ip_hdrlen(pkt); 1012 struct sock *mroute_sk; 1013 struct igmphdr *igmp; 1014 struct igmpmsg *msg; 1015 struct sk_buff *skb; 1016 int ret; 1017 1018 if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) 1019 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 1020 else 1021 skb = alloc_skb(128, GFP_ATOMIC); 1022 1023 if (!skb) 1024 return -ENOBUFS; 1025 1026 if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) { 1027 /* Ugly, but we have no choice with this interface. 1028 * Duplicate old header, fix ihl, length etc. 1029 * And all this only to mangle msg->im_msgtype and 1030 * to set msg->im_mbz to "mbz" :-) 1031 */ 1032 skb_push(skb, sizeof(struct iphdr)); 1033 skb_reset_network_header(skb); 1034 skb_reset_transport_header(skb); 1035 msg = (struct igmpmsg *)skb_network_header(skb); 1036 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 1037 msg->im_msgtype = assert; 1038 msg->im_mbz = 0; 1039 if (assert == IGMPMSG_WRVIFWHOLE) { 1040 msg->im_vif = vifi; 1041 msg->im_vif_hi = vifi >> 8; 1042 } else { 1043 msg->im_vif = mrt->mroute_reg_vif_num; 1044 msg->im_vif_hi = mrt->mroute_reg_vif_num >> 8; 1045 } 1046 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 1047 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 1048 sizeof(struct iphdr)); 1049 } else { 1050 /* Copy the IP header */ 1051 skb_set_network_header(skb, skb->len); 1052 skb_put(skb, ihl); 1053 skb_copy_to_linear_data(skb, pkt->data, ihl); 1054 /* Flag to the kernel this is a route add */ 1055 ip_hdr(skb)->protocol = 0; 1056 msg = (struct igmpmsg *)skb_network_header(skb); 1057 msg->im_vif = vifi; 1058 msg->im_vif_hi = vifi >> 8; 1059 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1060 /* Add our header */ 1061 igmp = skb_put(skb, sizeof(struct igmphdr)); 1062 igmp->type = assert; 1063 msg->im_msgtype = assert; 1064 igmp->code = 0; 1065 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 1066 skb->transport_header = skb->network_header; 1067 } 1068 1069 rcu_read_lock(); 1070 mroute_sk = rcu_dereference(mrt->mroute_sk); 1071 if (!mroute_sk) { 1072 rcu_read_unlock(); 1073 kfree_skb(skb); 1074 return -EINVAL; 1075 } 1076 1077 igmpmsg_netlink_event(mrt, skb); 1078 1079 /* Deliver to mrouted */ 1080 ret = sock_queue_rcv_skb(mroute_sk, skb); 1081 rcu_read_unlock(); 1082 if (ret < 0) { 1083 net_warn_ratelimited("mroute: pending queue full, dropping entries\n"); 1084 kfree_skb(skb); 1085 } 1086 1087 return ret; 1088 } 1089 1090 /* Queue a packet for resolution. It gets locked cache entry! */ 1091 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, 1092 struct sk_buff *skb, struct net_device *dev) 1093 { 1094 const struct iphdr *iph = ip_hdr(skb); 1095 struct mfc_cache *c; 1096 bool found = false; 1097 int err; 1098 1099 spin_lock_bh(&mfc_unres_lock); 1100 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1101 if (c->mfc_mcastgrp == iph->daddr && 1102 c->mfc_origin == iph->saddr) { 1103 found = true; 1104 break; 1105 } 1106 } 1107 1108 if (!found) { 1109 /* Create a new entry if allowable */ 1110 c = ipmr_cache_alloc_unres(); 1111 if (!c) { 1112 spin_unlock_bh(&mfc_unres_lock); 1113 1114 kfree_skb(skb); 1115 return -ENOBUFS; 1116 } 1117 1118 /* Fill in the new cache entry */ 1119 c->_c.mfc_parent = -1; 1120 c->mfc_origin = iph->saddr; 1121 c->mfc_mcastgrp = iph->daddr; 1122 1123 /* Reflect first query at mrouted. */ 1124 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 1125 1126 if (err < 0) { 1127 /* If the report failed throw the cache entry 1128 out - Brad Parker 1129 */ 1130 spin_unlock_bh(&mfc_unres_lock); 1131 1132 ipmr_cache_free(c); 1133 kfree_skb(skb); 1134 return err; 1135 } 1136 1137 atomic_inc(&mrt->cache_resolve_queue_len); 1138 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1139 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1140 1141 if (atomic_read(&mrt->cache_resolve_queue_len) == 1) 1142 mod_timer(&mrt->ipmr_expire_timer, 1143 c->_c.mfc_un.unres.expires); 1144 } 1145 1146 /* See if we can append the packet */ 1147 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1148 kfree_skb(skb); 1149 err = -ENOBUFS; 1150 } else { 1151 if (dev) { 1152 skb->dev = dev; 1153 skb->skb_iif = dev->ifindex; 1154 } 1155 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1156 err = 0; 1157 } 1158 1159 spin_unlock_bh(&mfc_unres_lock); 1160 return err; 1161 } 1162 1163 /* MFC cache manipulation by user space mroute daemon */ 1164 1165 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) 1166 { 1167 struct net *net = read_pnet(&mrt->net); 1168 struct mfc_cache *c; 1169 1170 /* The entries are added/deleted only under RTNL */ 1171 rcu_read_lock(); 1172 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1173 mfc->mfcc_mcastgrp.s_addr, parent); 1174 rcu_read_unlock(); 1175 if (!c) 1176 return -ENOENT; 1177 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params); 1178 list_del_rcu(&c->_c.list); 1179 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id); 1180 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1181 mr_cache_put(&c->_c); 1182 1183 return 0; 1184 } 1185 1186 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, 1187 struct mfcctl *mfc, int mrtsock, int parent) 1188 { 1189 struct mfc_cache *uc, *c; 1190 struct mr_mfc *_uc; 1191 bool found; 1192 int ret; 1193 1194 if (mfc->mfcc_parent >= MAXVIFS) 1195 return -ENFILE; 1196 1197 /* The entries are added/deleted only under RTNL */ 1198 rcu_read_lock(); 1199 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1200 mfc->mfcc_mcastgrp.s_addr, parent); 1201 rcu_read_unlock(); 1202 if (c) { 1203 write_lock_bh(&mrt_lock); 1204 c->_c.mfc_parent = mfc->mfcc_parent; 1205 ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); 1206 if (!mrtsock) 1207 c->_c.mfc_flags |= MFC_STATIC; 1208 write_unlock_bh(&mrt_lock); 1209 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, 1210 mrt->id); 1211 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1212 return 0; 1213 } 1214 1215 if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) && 1216 !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1217 return -EINVAL; 1218 1219 c = ipmr_cache_alloc(); 1220 if (!c) 1221 return -ENOMEM; 1222 1223 c->mfc_origin = mfc->mfcc_origin.s_addr; 1224 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1225 c->_c.mfc_parent = mfc->mfcc_parent; 1226 ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); 1227 if (!mrtsock) 1228 c->_c.mfc_flags |= MFC_STATIC; 1229 1230 ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1231 ipmr_rht_params); 1232 if (ret) { 1233 pr_err("ipmr: rhtable insert error %d\n", ret); 1234 ipmr_cache_free(c); 1235 return ret; 1236 } 1237 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1238 /* Check to see if we resolved a queued list. If so we 1239 * need to send on the frames and tidy up. 1240 */ 1241 found = false; 1242 spin_lock_bh(&mfc_unres_lock); 1243 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1244 uc = (struct mfc_cache *)_uc; 1245 if (uc->mfc_origin == c->mfc_origin && 1246 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1247 list_del(&_uc->list); 1248 atomic_dec(&mrt->cache_resolve_queue_len); 1249 found = true; 1250 break; 1251 } 1252 } 1253 if (list_empty(&mrt->mfc_unres_queue)) 1254 del_timer(&mrt->ipmr_expire_timer); 1255 spin_unlock_bh(&mfc_unres_lock); 1256 1257 if (found) { 1258 ipmr_cache_resolve(net, mrt, uc, c); 1259 ipmr_cache_free(uc); 1260 } 1261 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id); 1262 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1263 return 0; 1264 } 1265 1266 /* Close the multicast socket, and clear the vif tables etc */ 1267 static void mroute_clean_tables(struct mr_table *mrt, int flags) 1268 { 1269 struct net *net = read_pnet(&mrt->net); 1270 struct mr_mfc *c, *tmp; 1271 struct mfc_cache *cache; 1272 LIST_HEAD(list); 1273 int i; 1274 1275 /* Shut down all active vif entries */ 1276 if (flags & (MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC)) { 1277 for (i = 0; i < mrt->maxvif; i++) { 1278 if (((mrt->vif_table[i].flags & VIFF_STATIC) && 1279 !(flags & MRT_FLUSH_VIFS_STATIC)) || 1280 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS))) 1281 continue; 1282 vif_delete(mrt, i, 0, &list); 1283 } 1284 unregister_netdevice_many(&list); 1285 } 1286 1287 /* Wipe the cache */ 1288 if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) { 1289 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1290 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) || 1291 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC))) 1292 continue; 1293 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1294 list_del_rcu(&c->list); 1295 cache = (struct mfc_cache *)c; 1296 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache, 1297 mrt->id); 1298 mroute_netlink_event(mrt, cache, RTM_DELROUTE); 1299 mr_cache_put(c); 1300 } 1301 } 1302 1303 if (flags & MRT_FLUSH_MFC) { 1304 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1305 spin_lock_bh(&mfc_unres_lock); 1306 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1307 list_del(&c->list); 1308 cache = (struct mfc_cache *)c; 1309 mroute_netlink_event(mrt, cache, RTM_DELROUTE); 1310 ipmr_destroy_unres(mrt, cache); 1311 } 1312 spin_unlock_bh(&mfc_unres_lock); 1313 } 1314 } 1315 } 1316 1317 /* called from ip_ra_control(), before an RCU grace period, 1318 * we don't need to call synchronize_rcu() here 1319 */ 1320 static void mrtsock_destruct(struct sock *sk) 1321 { 1322 struct net *net = sock_net(sk); 1323 struct mr_table *mrt; 1324 1325 rtnl_lock(); 1326 ipmr_for_each_table(mrt, net) { 1327 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1328 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1329 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1330 NETCONFA_MC_FORWARDING, 1331 NETCONFA_IFINDEX_ALL, 1332 net->ipv4.devconf_all); 1333 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1334 mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC); 1335 } 1336 } 1337 rtnl_unlock(); 1338 } 1339 1340 /* Socket options and virtual interface manipulation. The whole 1341 * virtual interface system is a complete heap, but unfortunately 1342 * that's how BSD mrouted happens to think. Maybe one day with a proper 1343 * MOSPF/PIM router set up we can clean this up. 1344 */ 1345 1346 int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, 1347 unsigned int optlen) 1348 { 1349 struct net *net = sock_net(sk); 1350 int val, ret = 0, parent = 0; 1351 struct mr_table *mrt; 1352 struct vifctl vif; 1353 struct mfcctl mfc; 1354 bool do_wrvifwhole; 1355 u32 uval; 1356 1357 /* There's one exception to the lock - MRT_DONE which needs to unlock */ 1358 rtnl_lock(); 1359 if (sk->sk_type != SOCK_RAW || 1360 inet_sk(sk)->inet_num != IPPROTO_IGMP) { 1361 ret = -EOPNOTSUPP; 1362 goto out_unlock; 1363 } 1364 1365 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1366 if (!mrt) { 1367 ret = -ENOENT; 1368 goto out_unlock; 1369 } 1370 if (optname != MRT_INIT) { 1371 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1372 !ns_capable(net->user_ns, CAP_NET_ADMIN)) { 1373 ret = -EACCES; 1374 goto out_unlock; 1375 } 1376 } 1377 1378 switch (optname) { 1379 case MRT_INIT: 1380 if (optlen != sizeof(int)) { 1381 ret = -EINVAL; 1382 break; 1383 } 1384 if (rtnl_dereference(mrt->mroute_sk)) { 1385 ret = -EADDRINUSE; 1386 break; 1387 } 1388 1389 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1390 if (ret == 0) { 1391 rcu_assign_pointer(mrt->mroute_sk, sk); 1392 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1393 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1394 NETCONFA_MC_FORWARDING, 1395 NETCONFA_IFINDEX_ALL, 1396 net->ipv4.devconf_all); 1397 } 1398 break; 1399 case MRT_DONE: 1400 if (sk != rcu_access_pointer(mrt->mroute_sk)) { 1401 ret = -EACCES; 1402 } else { 1403 /* We need to unlock here because mrtsock_destruct takes 1404 * care of rtnl itself and we can't change that due to 1405 * the IP_ROUTER_ALERT setsockopt which runs without it. 1406 */ 1407 rtnl_unlock(); 1408 ret = ip_ra_control(sk, 0, NULL); 1409 goto out; 1410 } 1411 break; 1412 case MRT_ADD_VIF: 1413 case MRT_DEL_VIF: 1414 if (optlen != sizeof(vif)) { 1415 ret = -EINVAL; 1416 break; 1417 } 1418 if (copy_from_sockptr(&vif, optval, sizeof(vif))) { 1419 ret = -EFAULT; 1420 break; 1421 } 1422 if (vif.vifc_vifi >= MAXVIFS) { 1423 ret = -ENFILE; 1424 break; 1425 } 1426 if (optname == MRT_ADD_VIF) { 1427 ret = vif_add(net, mrt, &vif, 1428 sk == rtnl_dereference(mrt->mroute_sk)); 1429 } else { 1430 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1431 } 1432 break; 1433 /* Manipulate the forwarding caches. These live 1434 * in a sort of kernel/user symbiosis. 1435 */ 1436 case MRT_ADD_MFC: 1437 case MRT_DEL_MFC: 1438 parent = -1; 1439 fallthrough; 1440 case MRT_ADD_MFC_PROXY: 1441 case MRT_DEL_MFC_PROXY: 1442 if (optlen != sizeof(mfc)) { 1443 ret = -EINVAL; 1444 break; 1445 } 1446 if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) { 1447 ret = -EFAULT; 1448 break; 1449 } 1450 if (parent == 0) 1451 parent = mfc.mfcc_parent; 1452 if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) 1453 ret = ipmr_mfc_delete(mrt, &mfc, parent); 1454 else 1455 ret = ipmr_mfc_add(net, mrt, &mfc, 1456 sk == rtnl_dereference(mrt->mroute_sk), 1457 parent); 1458 break; 1459 case MRT_FLUSH: 1460 if (optlen != sizeof(val)) { 1461 ret = -EINVAL; 1462 break; 1463 } 1464 if (copy_from_sockptr(&val, optval, sizeof(val))) { 1465 ret = -EFAULT; 1466 break; 1467 } 1468 mroute_clean_tables(mrt, val); 1469 break; 1470 /* Control PIM assert. */ 1471 case MRT_ASSERT: 1472 if (optlen != sizeof(val)) { 1473 ret = -EINVAL; 1474 break; 1475 } 1476 if (copy_from_sockptr(&val, optval, sizeof(val))) { 1477 ret = -EFAULT; 1478 break; 1479 } 1480 mrt->mroute_do_assert = val; 1481 break; 1482 case MRT_PIM: 1483 if (!ipmr_pimsm_enabled()) { 1484 ret = -ENOPROTOOPT; 1485 break; 1486 } 1487 if (optlen != sizeof(val)) { 1488 ret = -EINVAL; 1489 break; 1490 } 1491 if (copy_from_sockptr(&val, optval, sizeof(val))) { 1492 ret = -EFAULT; 1493 break; 1494 } 1495 1496 do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE); 1497 val = !!val; 1498 if (val != mrt->mroute_do_pim) { 1499 mrt->mroute_do_pim = val; 1500 mrt->mroute_do_assert = val; 1501 mrt->mroute_do_wrvifwhole = do_wrvifwhole; 1502 } 1503 break; 1504 case MRT_TABLE: 1505 if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { 1506 ret = -ENOPROTOOPT; 1507 break; 1508 } 1509 if (optlen != sizeof(uval)) { 1510 ret = -EINVAL; 1511 break; 1512 } 1513 if (copy_from_sockptr(&uval, optval, sizeof(uval))) { 1514 ret = -EFAULT; 1515 break; 1516 } 1517 1518 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1519 ret = -EBUSY; 1520 } else { 1521 mrt = ipmr_new_table(net, uval); 1522 if (IS_ERR(mrt)) 1523 ret = PTR_ERR(mrt); 1524 else 1525 raw_sk(sk)->ipmr_table = uval; 1526 } 1527 break; 1528 /* Spurious command, or MRT_VERSION which you cannot set. */ 1529 default: 1530 ret = -ENOPROTOOPT; 1531 } 1532 out_unlock: 1533 rtnl_unlock(); 1534 out: 1535 return ret; 1536 } 1537 1538 /* Getsock opt support for the multicast routing system. */ 1539 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) 1540 { 1541 int olr; 1542 int val; 1543 struct net *net = sock_net(sk); 1544 struct mr_table *mrt; 1545 1546 if (sk->sk_type != SOCK_RAW || 1547 inet_sk(sk)->inet_num != IPPROTO_IGMP) 1548 return -EOPNOTSUPP; 1549 1550 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1551 if (!mrt) 1552 return -ENOENT; 1553 1554 switch (optname) { 1555 case MRT_VERSION: 1556 val = 0x0305; 1557 break; 1558 case MRT_PIM: 1559 if (!ipmr_pimsm_enabled()) 1560 return -ENOPROTOOPT; 1561 val = mrt->mroute_do_pim; 1562 break; 1563 case MRT_ASSERT: 1564 val = mrt->mroute_do_assert; 1565 break; 1566 default: 1567 return -ENOPROTOOPT; 1568 } 1569 1570 if (get_user(olr, optlen)) 1571 return -EFAULT; 1572 olr = min_t(unsigned int, olr, sizeof(int)); 1573 if (olr < 0) 1574 return -EINVAL; 1575 if (put_user(olr, optlen)) 1576 return -EFAULT; 1577 if (copy_to_user(optval, &val, olr)) 1578 return -EFAULT; 1579 return 0; 1580 } 1581 1582 /* The IP multicast ioctl support routines. */ 1583 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) 1584 { 1585 struct sioc_sg_req sr; 1586 struct sioc_vif_req vr; 1587 struct vif_device *vif; 1588 struct mfc_cache *c; 1589 struct net *net = sock_net(sk); 1590 struct mr_table *mrt; 1591 1592 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1593 if (!mrt) 1594 return -ENOENT; 1595 1596 switch (cmd) { 1597 case SIOCGETVIFCNT: 1598 if (copy_from_user(&vr, arg, sizeof(vr))) 1599 return -EFAULT; 1600 if (vr.vifi >= mrt->maxvif) 1601 return -EINVAL; 1602 vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); 1603 read_lock(&mrt_lock); 1604 vif = &mrt->vif_table[vr.vifi]; 1605 if (VIF_EXISTS(mrt, vr.vifi)) { 1606 vr.icount = vif->pkt_in; 1607 vr.ocount = vif->pkt_out; 1608 vr.ibytes = vif->bytes_in; 1609 vr.obytes = vif->bytes_out; 1610 read_unlock(&mrt_lock); 1611 1612 if (copy_to_user(arg, &vr, sizeof(vr))) 1613 return -EFAULT; 1614 return 0; 1615 } 1616 read_unlock(&mrt_lock); 1617 return -EADDRNOTAVAIL; 1618 case SIOCGETSGCNT: 1619 if (copy_from_user(&sr, arg, sizeof(sr))) 1620 return -EFAULT; 1621 1622 rcu_read_lock(); 1623 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1624 if (c) { 1625 sr.pktcnt = c->_c.mfc_un.res.pkt; 1626 sr.bytecnt = c->_c.mfc_un.res.bytes; 1627 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1628 rcu_read_unlock(); 1629 1630 if (copy_to_user(arg, &sr, sizeof(sr))) 1631 return -EFAULT; 1632 return 0; 1633 } 1634 rcu_read_unlock(); 1635 return -EADDRNOTAVAIL; 1636 default: 1637 return -ENOIOCTLCMD; 1638 } 1639 } 1640 1641 #ifdef CONFIG_COMPAT 1642 struct compat_sioc_sg_req { 1643 struct in_addr src; 1644 struct in_addr grp; 1645 compat_ulong_t pktcnt; 1646 compat_ulong_t bytecnt; 1647 compat_ulong_t wrong_if; 1648 }; 1649 1650 struct compat_sioc_vif_req { 1651 vifi_t vifi; /* Which iface */ 1652 compat_ulong_t icount; 1653 compat_ulong_t ocount; 1654 compat_ulong_t ibytes; 1655 compat_ulong_t obytes; 1656 }; 1657 1658 int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1659 { 1660 struct compat_sioc_sg_req sr; 1661 struct compat_sioc_vif_req vr; 1662 struct vif_device *vif; 1663 struct mfc_cache *c; 1664 struct net *net = sock_net(sk); 1665 struct mr_table *mrt; 1666 1667 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1668 if (!mrt) 1669 return -ENOENT; 1670 1671 switch (cmd) { 1672 case SIOCGETVIFCNT: 1673 if (copy_from_user(&vr, arg, sizeof(vr))) 1674 return -EFAULT; 1675 if (vr.vifi >= mrt->maxvif) 1676 return -EINVAL; 1677 vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); 1678 read_lock(&mrt_lock); 1679 vif = &mrt->vif_table[vr.vifi]; 1680 if (VIF_EXISTS(mrt, vr.vifi)) { 1681 vr.icount = vif->pkt_in; 1682 vr.ocount = vif->pkt_out; 1683 vr.ibytes = vif->bytes_in; 1684 vr.obytes = vif->bytes_out; 1685 read_unlock(&mrt_lock); 1686 1687 if (copy_to_user(arg, &vr, sizeof(vr))) 1688 return -EFAULT; 1689 return 0; 1690 } 1691 read_unlock(&mrt_lock); 1692 return -EADDRNOTAVAIL; 1693 case SIOCGETSGCNT: 1694 if (copy_from_user(&sr, arg, sizeof(sr))) 1695 return -EFAULT; 1696 1697 rcu_read_lock(); 1698 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1699 if (c) { 1700 sr.pktcnt = c->_c.mfc_un.res.pkt; 1701 sr.bytecnt = c->_c.mfc_un.res.bytes; 1702 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1703 rcu_read_unlock(); 1704 1705 if (copy_to_user(arg, &sr, sizeof(sr))) 1706 return -EFAULT; 1707 return 0; 1708 } 1709 rcu_read_unlock(); 1710 return -EADDRNOTAVAIL; 1711 default: 1712 return -ENOIOCTLCMD; 1713 } 1714 } 1715 #endif 1716 1717 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1718 { 1719 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1720 struct net *net = dev_net(dev); 1721 struct mr_table *mrt; 1722 struct vif_device *v; 1723 int ct; 1724 1725 if (event != NETDEV_UNREGISTER) 1726 return NOTIFY_DONE; 1727 1728 ipmr_for_each_table(mrt, net) { 1729 v = &mrt->vif_table[0]; 1730 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1731 if (v->dev == dev) 1732 vif_delete(mrt, ct, 1, NULL); 1733 } 1734 } 1735 return NOTIFY_DONE; 1736 } 1737 1738 static struct notifier_block ip_mr_notifier = { 1739 .notifier_call = ipmr_device_event, 1740 }; 1741 1742 /* Encapsulate a packet by attaching a valid IPIP header to it. 1743 * This avoids tunnel drivers and other mess and gives us the speed so 1744 * important for multicast video. 1745 */ 1746 static void ip_encap(struct net *net, struct sk_buff *skb, 1747 __be32 saddr, __be32 daddr) 1748 { 1749 struct iphdr *iph; 1750 const struct iphdr *old_iph = ip_hdr(skb); 1751 1752 skb_push(skb, sizeof(struct iphdr)); 1753 skb->transport_header = skb->network_header; 1754 skb_reset_network_header(skb); 1755 iph = ip_hdr(skb); 1756 1757 iph->version = 4; 1758 iph->tos = old_iph->tos; 1759 iph->ttl = old_iph->ttl; 1760 iph->frag_off = 0; 1761 iph->daddr = daddr; 1762 iph->saddr = saddr; 1763 iph->protocol = IPPROTO_IPIP; 1764 iph->ihl = 5; 1765 iph->tot_len = htons(skb->len); 1766 ip_select_ident(net, skb, NULL); 1767 ip_send_check(iph); 1768 1769 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1770 nf_reset_ct(skb); 1771 } 1772 1773 static inline int ipmr_forward_finish(struct net *net, struct sock *sk, 1774 struct sk_buff *skb) 1775 { 1776 struct ip_options *opt = &(IPCB(skb)->opt); 1777 1778 IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); 1779 IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len); 1780 1781 if (unlikely(opt->optlen)) 1782 ip_forward_options(skb); 1783 1784 return dst_output(net, sk, skb); 1785 } 1786 1787 #ifdef CONFIG_NET_SWITCHDEV 1788 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1789 int in_vifi, int out_vifi) 1790 { 1791 struct vif_device *out_vif = &mrt->vif_table[out_vifi]; 1792 struct vif_device *in_vif = &mrt->vif_table[in_vifi]; 1793 1794 if (!skb->offload_l3_fwd_mark) 1795 return false; 1796 if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len) 1797 return false; 1798 return netdev_phys_item_id_same(&out_vif->dev_parent_id, 1799 &in_vif->dev_parent_id); 1800 } 1801 #else 1802 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1803 int in_vifi, int out_vifi) 1804 { 1805 return false; 1806 } 1807 #endif 1808 1809 /* Processing handlers for ipmr_forward */ 1810 1811 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, 1812 int in_vifi, struct sk_buff *skb, int vifi) 1813 { 1814 const struct iphdr *iph = ip_hdr(skb); 1815 struct vif_device *vif = &mrt->vif_table[vifi]; 1816 struct net_device *dev; 1817 struct rtable *rt; 1818 struct flowi4 fl4; 1819 int encap = 0; 1820 1821 if (!vif->dev) 1822 goto out_free; 1823 1824 if (vif->flags & VIFF_REGISTER) { 1825 vif->pkt_out++; 1826 vif->bytes_out += skb->len; 1827 vif->dev->stats.tx_bytes += skb->len; 1828 vif->dev->stats.tx_packets++; 1829 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); 1830 goto out_free; 1831 } 1832 1833 if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) 1834 goto out_free; 1835 1836 if (vif->flags & VIFF_TUNNEL) { 1837 rt = ip_route_output_ports(net, &fl4, NULL, 1838 vif->remote, vif->local, 1839 0, 0, 1840 IPPROTO_IPIP, 1841 RT_TOS(iph->tos), vif->link); 1842 if (IS_ERR(rt)) 1843 goto out_free; 1844 encap = sizeof(struct iphdr); 1845 } else { 1846 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0, 1847 0, 0, 1848 IPPROTO_IPIP, 1849 RT_TOS(iph->tos), vif->link); 1850 if (IS_ERR(rt)) 1851 goto out_free; 1852 } 1853 1854 dev = rt->dst.dev; 1855 1856 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { 1857 /* Do not fragment multicasts. Alas, IPv4 does not 1858 * allow to send ICMP, so that packets will disappear 1859 * to blackhole. 1860 */ 1861 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 1862 ip_rt_put(rt); 1863 goto out_free; 1864 } 1865 1866 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len; 1867 1868 if (skb_cow(skb, encap)) { 1869 ip_rt_put(rt); 1870 goto out_free; 1871 } 1872 1873 vif->pkt_out++; 1874 vif->bytes_out += skb->len; 1875 1876 skb_dst_drop(skb); 1877 skb_dst_set(skb, &rt->dst); 1878 ip_decrease_ttl(ip_hdr(skb)); 1879 1880 /* FIXME: forward and output firewalls used to be called here. 1881 * What do we do with netfilter? -- RR 1882 */ 1883 if (vif->flags & VIFF_TUNNEL) { 1884 ip_encap(net, skb, vif->local, vif->remote); 1885 /* FIXME: extra output firewall step used to be here. --RR */ 1886 vif->dev->stats.tx_packets++; 1887 vif->dev->stats.tx_bytes += skb->len; 1888 } 1889 1890 IPCB(skb)->flags |= IPSKB_FORWARDED; 1891 1892 /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1893 * not only before forwarding, but after forwarding on all output 1894 * interfaces. It is clear, if mrouter runs a multicasting 1895 * program, it should receive packets not depending to what interface 1896 * program is joined. 1897 * If we will not make it, the program will have to join on all 1898 * interfaces. On the other hand, multihoming host (or router, but 1899 * not mrouter) cannot join to more than one interface - it will 1900 * result in receiving multiple packets. 1901 */ 1902 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, 1903 net, NULL, skb, skb->dev, dev, 1904 ipmr_forward_finish); 1905 return; 1906 1907 out_free: 1908 kfree_skb(skb); 1909 } 1910 1911 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) 1912 { 1913 int ct; 1914 1915 for (ct = mrt->maxvif-1; ct >= 0; ct--) { 1916 if (mrt->vif_table[ct].dev == dev) 1917 break; 1918 } 1919 return ct; 1920 } 1921 1922 /* "local" means that we should preserve one skb (for local delivery) */ 1923 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 1924 struct net_device *dev, struct sk_buff *skb, 1925 struct mfc_cache *c, int local) 1926 { 1927 int true_vifi = ipmr_find_vif(mrt, dev); 1928 int psend = -1; 1929 int vif, ct; 1930 1931 vif = c->_c.mfc_parent; 1932 c->_c.mfc_un.res.pkt++; 1933 c->_c.mfc_un.res.bytes += skb->len; 1934 c->_c.mfc_un.res.lastuse = jiffies; 1935 1936 if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { 1937 struct mfc_cache *cache_proxy; 1938 1939 /* For an (*,G) entry, we only check that the incoming 1940 * interface is part of the static tree. 1941 */ 1942 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 1943 if (cache_proxy && 1944 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) 1945 goto forward; 1946 } 1947 1948 /* Wrong interface: drop packet and (maybe) send PIM assert. */ 1949 if (mrt->vif_table[vif].dev != dev) { 1950 if (rt_is_output_route(skb_rtable(skb))) { 1951 /* It is our own packet, looped back. 1952 * Very complicated situation... 1953 * 1954 * The best workaround until routing daemons will be 1955 * fixed is not to redistribute packet, if it was 1956 * send through wrong interface. It means, that 1957 * multicast applications WILL NOT work for 1958 * (S,G), which have default multicast route pointing 1959 * to wrong oif. In any case, it is not a good 1960 * idea to use multicasting applications on router. 1961 */ 1962 goto dont_forward; 1963 } 1964 1965 c->_c.mfc_un.res.wrong_if++; 1966 1967 if (true_vifi >= 0 && mrt->mroute_do_assert && 1968 /* pimsm uses asserts, when switching from RPT to SPT, 1969 * so that we cannot check that packet arrived on an oif. 1970 * It is bad, but otherwise we would need to move pretty 1971 * large chunk of pimd to kernel. Ough... --ANK 1972 */ 1973 (mrt->mroute_do_pim || 1974 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 1975 time_after(jiffies, 1976 c->_c.mfc_un.res.last_assert + 1977 MFC_ASSERT_THRESH)) { 1978 c->_c.mfc_un.res.last_assert = jiffies; 1979 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); 1980 if (mrt->mroute_do_wrvifwhole) 1981 ipmr_cache_report(mrt, skb, true_vifi, 1982 IGMPMSG_WRVIFWHOLE); 1983 } 1984 goto dont_forward; 1985 } 1986 1987 forward: 1988 mrt->vif_table[vif].pkt_in++; 1989 mrt->vif_table[vif].bytes_in += skb->len; 1990 1991 /* Forward the frame */ 1992 if (c->mfc_origin == htonl(INADDR_ANY) && 1993 c->mfc_mcastgrp == htonl(INADDR_ANY)) { 1994 if (true_vifi >= 0 && 1995 true_vifi != c->_c.mfc_parent && 1996 ip_hdr(skb)->ttl > 1997 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 1998 /* It's an (*,*) entry and the packet is not coming from 1999 * the upstream: forward the packet to the upstream 2000 * only. 2001 */ 2002 psend = c->_c.mfc_parent; 2003 goto last_forward; 2004 } 2005 goto dont_forward; 2006 } 2007 for (ct = c->_c.mfc_un.res.maxvif - 1; 2008 ct >= c->_c.mfc_un.res.minvif; ct--) { 2009 /* For (*,G) entry, don't forward to the incoming interface */ 2010 if ((c->mfc_origin != htonl(INADDR_ANY) || 2011 ct != true_vifi) && 2012 ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) { 2013 if (psend != -1) { 2014 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2015 2016 if (skb2) 2017 ipmr_queue_xmit(net, mrt, true_vifi, 2018 skb2, psend); 2019 } 2020 psend = ct; 2021 } 2022 } 2023 last_forward: 2024 if (psend != -1) { 2025 if (local) { 2026 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2027 2028 if (skb2) 2029 ipmr_queue_xmit(net, mrt, true_vifi, skb2, 2030 psend); 2031 } else { 2032 ipmr_queue_xmit(net, mrt, true_vifi, skb, psend); 2033 return; 2034 } 2035 } 2036 2037 dont_forward: 2038 if (!local) 2039 kfree_skb(skb); 2040 } 2041 2042 static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) 2043 { 2044 struct rtable *rt = skb_rtable(skb); 2045 struct iphdr *iph = ip_hdr(skb); 2046 struct flowi4 fl4 = { 2047 .daddr = iph->daddr, 2048 .saddr = iph->saddr, 2049 .flowi4_tos = RT_TOS(iph->tos), 2050 .flowi4_oif = (rt_is_output_route(rt) ? 2051 skb->dev->ifindex : 0), 2052 .flowi4_iif = (rt_is_output_route(rt) ? 2053 LOOPBACK_IFINDEX : 2054 skb->dev->ifindex), 2055 .flowi4_mark = skb->mark, 2056 }; 2057 struct mr_table *mrt; 2058 int err; 2059 2060 err = ipmr_fib_lookup(net, &fl4, &mrt); 2061 if (err) 2062 return ERR_PTR(err); 2063 return mrt; 2064 } 2065 2066 /* Multicast packets for forwarding arrive here 2067 * Called with rcu_read_lock(); 2068 */ 2069 int ip_mr_input(struct sk_buff *skb) 2070 { 2071 struct mfc_cache *cache; 2072 struct net *net = dev_net(skb->dev); 2073 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 2074 struct mr_table *mrt; 2075 struct net_device *dev; 2076 2077 /* skb->dev passed in is the loX master dev for vrfs. 2078 * As there are no vifs associated with loopback devices, 2079 * get the proper interface that does have a vif associated with it. 2080 */ 2081 dev = skb->dev; 2082 if (netif_is_l3_master(skb->dev)) { 2083 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2084 if (!dev) { 2085 kfree_skb(skb); 2086 return -ENODEV; 2087 } 2088 } 2089 2090 /* Packet is looped back after forward, it should not be 2091 * forwarded second time, but still can be delivered locally. 2092 */ 2093 if (IPCB(skb)->flags & IPSKB_FORWARDED) 2094 goto dont_forward; 2095 2096 mrt = ipmr_rt_fib_lookup(net, skb); 2097 if (IS_ERR(mrt)) { 2098 kfree_skb(skb); 2099 return PTR_ERR(mrt); 2100 } 2101 if (!local) { 2102 if (IPCB(skb)->opt.router_alert) { 2103 if (ip_call_ra_chain(skb)) 2104 return 0; 2105 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) { 2106 /* IGMPv1 (and broken IGMPv2 implementations sort of 2107 * Cisco IOS <= 11.2(8)) do not put router alert 2108 * option to IGMP packets destined to routable 2109 * groups. It is very bad, because it means 2110 * that we can forward NO IGMP messages. 2111 */ 2112 struct sock *mroute_sk; 2113 2114 mroute_sk = rcu_dereference(mrt->mroute_sk); 2115 if (mroute_sk) { 2116 nf_reset_ct(skb); 2117 raw_rcv(mroute_sk, skb); 2118 return 0; 2119 } 2120 } 2121 } 2122 2123 /* already under rcu_read_lock() */ 2124 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 2125 if (!cache) { 2126 int vif = ipmr_find_vif(mrt, dev); 2127 2128 if (vif >= 0) 2129 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, 2130 vif); 2131 } 2132 2133 /* No usable cache entry */ 2134 if (!cache) { 2135 int vif; 2136 2137 if (local) { 2138 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2139 ip_local_deliver(skb); 2140 if (!skb2) 2141 return -ENOBUFS; 2142 skb = skb2; 2143 } 2144 2145 read_lock(&mrt_lock); 2146 vif = ipmr_find_vif(mrt, dev); 2147 if (vif >= 0) { 2148 int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev); 2149 read_unlock(&mrt_lock); 2150 2151 return err2; 2152 } 2153 read_unlock(&mrt_lock); 2154 kfree_skb(skb); 2155 return -ENODEV; 2156 } 2157 2158 read_lock(&mrt_lock); 2159 ip_mr_forward(net, mrt, dev, skb, cache, local); 2160 read_unlock(&mrt_lock); 2161 2162 if (local) 2163 return ip_local_deliver(skb); 2164 2165 return 0; 2166 2167 dont_forward: 2168 if (local) 2169 return ip_local_deliver(skb); 2170 kfree_skb(skb); 2171 return 0; 2172 } 2173 2174 #ifdef CONFIG_IP_PIMSM_V1 2175 /* Handle IGMP messages of PIMv1 */ 2176 int pim_rcv_v1(struct sk_buff *skb) 2177 { 2178 struct igmphdr *pim; 2179 struct net *net = dev_net(skb->dev); 2180 struct mr_table *mrt; 2181 2182 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2183 goto drop; 2184 2185 pim = igmp_hdr(skb); 2186 2187 mrt = ipmr_rt_fib_lookup(net, skb); 2188 if (IS_ERR(mrt)) 2189 goto drop; 2190 if (!mrt->mroute_do_pim || 2191 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 2192 goto drop; 2193 2194 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2195 drop: 2196 kfree_skb(skb); 2197 } 2198 return 0; 2199 } 2200 #endif 2201 2202 #ifdef CONFIG_IP_PIMSM_V2 2203 static int pim_rcv(struct sk_buff *skb) 2204 { 2205 struct pimreghdr *pim; 2206 struct net *net = dev_net(skb->dev); 2207 struct mr_table *mrt; 2208 2209 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2210 goto drop; 2211 2212 pim = (struct pimreghdr *)skb_transport_header(skb); 2213 if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) || 2214 (pim->flags & PIM_NULL_REGISTER) || 2215 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 2216 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 2217 goto drop; 2218 2219 mrt = ipmr_rt_fib_lookup(net, skb); 2220 if (IS_ERR(mrt)) 2221 goto drop; 2222 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2223 drop: 2224 kfree_skb(skb); 2225 } 2226 return 0; 2227 } 2228 #endif 2229 2230 int ipmr_get_route(struct net *net, struct sk_buff *skb, 2231 __be32 saddr, __be32 daddr, 2232 struct rtmsg *rtm, u32 portid) 2233 { 2234 struct mfc_cache *cache; 2235 struct mr_table *mrt; 2236 int err; 2237 2238 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2239 if (!mrt) 2240 return -ENOENT; 2241 2242 rcu_read_lock(); 2243 cache = ipmr_cache_find(mrt, saddr, daddr); 2244 if (!cache && skb->dev) { 2245 int vif = ipmr_find_vif(mrt, skb->dev); 2246 2247 if (vif >= 0) 2248 cache = ipmr_cache_find_any(mrt, daddr, vif); 2249 } 2250 if (!cache) { 2251 struct sk_buff *skb2; 2252 struct iphdr *iph; 2253 struct net_device *dev; 2254 int vif = -1; 2255 2256 dev = skb->dev; 2257 read_lock(&mrt_lock); 2258 if (dev) 2259 vif = ipmr_find_vif(mrt, dev); 2260 if (vif < 0) { 2261 read_unlock(&mrt_lock); 2262 rcu_read_unlock(); 2263 return -ENODEV; 2264 } 2265 2266 skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr)); 2267 if (!skb2) { 2268 read_unlock(&mrt_lock); 2269 rcu_read_unlock(); 2270 return -ENOMEM; 2271 } 2272 2273 NETLINK_CB(skb2).portid = portid; 2274 skb_push(skb2, sizeof(struct iphdr)); 2275 skb_reset_network_header(skb2); 2276 iph = ip_hdr(skb2); 2277 iph->ihl = sizeof(struct iphdr) >> 2; 2278 iph->saddr = saddr; 2279 iph->daddr = daddr; 2280 iph->version = 0; 2281 err = ipmr_cache_unresolved(mrt, vif, skb2, dev); 2282 read_unlock(&mrt_lock); 2283 rcu_read_unlock(); 2284 return err; 2285 } 2286 2287 read_lock(&mrt_lock); 2288 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2289 read_unlock(&mrt_lock); 2290 rcu_read_unlock(); 2291 return err; 2292 } 2293 2294 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2295 u32 portid, u32 seq, struct mfc_cache *c, int cmd, 2296 int flags) 2297 { 2298 struct nlmsghdr *nlh; 2299 struct rtmsg *rtm; 2300 int err; 2301 2302 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2303 if (!nlh) 2304 return -EMSGSIZE; 2305 2306 rtm = nlmsg_data(nlh); 2307 rtm->rtm_family = RTNL_FAMILY_IPMR; 2308 rtm->rtm_dst_len = 32; 2309 rtm->rtm_src_len = 32; 2310 rtm->rtm_tos = 0; 2311 rtm->rtm_table = mrt->id; 2312 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2313 goto nla_put_failure; 2314 rtm->rtm_type = RTN_MULTICAST; 2315 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2316 if (c->_c.mfc_flags & MFC_STATIC) 2317 rtm->rtm_protocol = RTPROT_STATIC; 2318 else 2319 rtm->rtm_protocol = RTPROT_MROUTED; 2320 rtm->rtm_flags = 0; 2321 2322 if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || 2323 nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) 2324 goto nla_put_failure; 2325 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2326 /* do not break the dump if cache is unresolved */ 2327 if (err < 0 && err != -ENOENT) 2328 goto nla_put_failure; 2329 2330 nlmsg_end(skb, nlh); 2331 return 0; 2332 2333 nla_put_failure: 2334 nlmsg_cancel(skb, nlh); 2335 return -EMSGSIZE; 2336 } 2337 2338 static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2339 u32 portid, u32 seq, struct mr_mfc *c, int cmd, 2340 int flags) 2341 { 2342 return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c, 2343 cmd, flags); 2344 } 2345 2346 static size_t mroute_msgsize(bool unresolved, int maxvif) 2347 { 2348 size_t len = 2349 NLMSG_ALIGN(sizeof(struct rtmsg)) 2350 + nla_total_size(4) /* RTA_TABLE */ 2351 + nla_total_size(4) /* RTA_SRC */ 2352 + nla_total_size(4) /* RTA_DST */ 2353 ; 2354 2355 if (!unresolved) 2356 len = len 2357 + nla_total_size(4) /* RTA_IIF */ 2358 + nla_total_size(0) /* RTA_MULTIPATH */ 2359 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2360 /* RTA_MFC_STATS */ 2361 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2362 ; 2363 2364 return len; 2365 } 2366 2367 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 2368 int cmd) 2369 { 2370 struct net *net = read_pnet(&mrt->net); 2371 struct sk_buff *skb; 2372 int err = -ENOBUFS; 2373 2374 skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS, 2375 mrt->maxvif), 2376 GFP_ATOMIC); 2377 if (!skb) 2378 goto errout; 2379 2380 err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2381 if (err < 0) 2382 goto errout; 2383 2384 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC); 2385 return; 2386 2387 errout: 2388 kfree_skb(skb); 2389 if (err < 0) 2390 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err); 2391 } 2392 2393 static size_t igmpmsg_netlink_msgsize(size_t payloadlen) 2394 { 2395 size_t len = 2396 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2397 + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */ 2398 + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */ 2399 + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */ 2400 + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */ 2401 + nla_total_size(4) /* IPMRA_CREPORT_TABLE */ 2402 /* IPMRA_CREPORT_PKT */ 2403 + nla_total_size(payloadlen) 2404 ; 2405 2406 return len; 2407 } 2408 2409 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) 2410 { 2411 struct net *net = read_pnet(&mrt->net); 2412 struct nlmsghdr *nlh; 2413 struct rtgenmsg *rtgenm; 2414 struct igmpmsg *msg; 2415 struct sk_buff *skb; 2416 struct nlattr *nla; 2417 int payloadlen; 2418 2419 payloadlen = pkt->len - sizeof(struct igmpmsg); 2420 msg = (struct igmpmsg *)skb_network_header(pkt); 2421 2422 skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2423 if (!skb) 2424 goto errout; 2425 2426 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2427 sizeof(struct rtgenmsg), 0); 2428 if (!nlh) 2429 goto errout; 2430 rtgenm = nlmsg_data(nlh); 2431 rtgenm->rtgen_family = RTNL_FAMILY_IPMR; 2432 if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) || 2433 nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif | (msg->im_vif_hi << 8)) || 2434 nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR, 2435 msg->im_src.s_addr) || 2436 nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR, 2437 msg->im_dst.s_addr) || 2438 nla_put_u32(skb, IPMRA_CREPORT_TABLE, mrt->id)) 2439 goto nla_put_failure; 2440 2441 nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen); 2442 if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg), 2443 nla_data(nla), payloadlen)) 2444 goto nla_put_failure; 2445 2446 nlmsg_end(skb, nlh); 2447 2448 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC); 2449 return; 2450 2451 nla_put_failure: 2452 nlmsg_cancel(skb, nlh); 2453 errout: 2454 kfree_skb(skb); 2455 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); 2456 } 2457 2458 static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb, 2459 const struct nlmsghdr *nlh, 2460 struct nlattr **tb, 2461 struct netlink_ext_ack *extack) 2462 { 2463 struct rtmsg *rtm; 2464 int i, err; 2465 2466 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { 2467 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request"); 2468 return -EINVAL; 2469 } 2470 2471 if (!netlink_strict_get_check(skb)) 2472 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, 2473 rtm_ipv4_policy, extack); 2474 2475 rtm = nlmsg_data(nlh); 2476 if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || 2477 (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || 2478 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || 2479 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) { 2480 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for multicast route get request"); 2481 return -EINVAL; 2482 } 2483 2484 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, 2485 rtm_ipv4_policy, extack); 2486 if (err) 2487 return err; 2488 2489 if ((tb[RTA_SRC] && !rtm->rtm_src_len) || 2490 (tb[RTA_DST] && !rtm->rtm_dst_len)) { 2491 NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); 2492 return -EINVAL; 2493 } 2494 2495 for (i = 0; i <= RTA_MAX; i++) { 2496 if (!tb[i]) 2497 continue; 2498 2499 switch (i) { 2500 case RTA_SRC: 2501 case RTA_DST: 2502 case RTA_TABLE: 2503 break; 2504 default: 2505 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in multicast route get request"); 2506 return -EINVAL; 2507 } 2508 } 2509 2510 return 0; 2511 } 2512 2513 static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2514 struct netlink_ext_ack *extack) 2515 { 2516 struct net *net = sock_net(in_skb->sk); 2517 struct nlattr *tb[RTA_MAX + 1]; 2518 struct sk_buff *skb = NULL; 2519 struct mfc_cache *cache; 2520 struct mr_table *mrt; 2521 __be32 src, grp; 2522 u32 tableid; 2523 int err; 2524 2525 err = ipmr_rtm_valid_getroute_req(in_skb, nlh, tb, extack); 2526 if (err < 0) 2527 goto errout; 2528 2529 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; 2530 grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; 2531 tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; 2532 2533 mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); 2534 if (!mrt) { 2535 err = -ENOENT; 2536 goto errout_free; 2537 } 2538 2539 /* entries are added/deleted only under RTNL */ 2540 rcu_read_lock(); 2541 cache = ipmr_cache_find(mrt, src, grp); 2542 rcu_read_unlock(); 2543 if (!cache) { 2544 err = -ENOENT; 2545 goto errout_free; 2546 } 2547 2548 skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL); 2549 if (!skb) { 2550 err = -ENOBUFS; 2551 goto errout_free; 2552 } 2553 2554 err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, 2555 nlh->nlmsg_seq, cache, 2556 RTM_NEWROUTE, 0); 2557 if (err < 0) 2558 goto errout_free; 2559 2560 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2561 2562 errout: 2563 return err; 2564 2565 errout_free: 2566 kfree_skb(skb); 2567 goto errout; 2568 } 2569 2570 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2571 { 2572 struct fib_dump_filter filter = {}; 2573 int err; 2574 2575 if (cb->strict_check) { 2576 err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh, 2577 &filter, cb); 2578 if (err < 0) 2579 return err; 2580 } 2581 2582 if (filter.table_id) { 2583 struct mr_table *mrt; 2584 2585 mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id); 2586 if (!mrt) { 2587 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR) 2588 return skb->len; 2589 2590 NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist"); 2591 return -ENOENT; 2592 } 2593 err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute, 2594 &mfc_unres_lock, &filter); 2595 return skb->len ? : err; 2596 } 2597 2598 return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter, 2599 _ipmr_fill_mroute, &mfc_unres_lock, &filter); 2600 } 2601 2602 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { 2603 [RTA_SRC] = { .type = NLA_U32 }, 2604 [RTA_DST] = { .type = NLA_U32 }, 2605 [RTA_IIF] = { .type = NLA_U32 }, 2606 [RTA_TABLE] = { .type = NLA_U32 }, 2607 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2608 }; 2609 2610 static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol) 2611 { 2612 switch (rtm_protocol) { 2613 case RTPROT_STATIC: 2614 case RTPROT_MROUTED: 2615 return true; 2616 } 2617 return false; 2618 } 2619 2620 static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc) 2621 { 2622 struct rtnexthop *rtnh = nla_data(nla); 2623 int remaining = nla_len(nla), vifi = 0; 2624 2625 while (rtnh_ok(rtnh, remaining)) { 2626 mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops; 2627 if (++vifi == MAXVIFS) 2628 break; 2629 rtnh = rtnh_next(rtnh, &remaining); 2630 } 2631 2632 return remaining > 0 ? -EINVAL : vifi; 2633 } 2634 2635 /* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */ 2636 static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, 2637 struct mfcctl *mfcc, int *mrtsock, 2638 struct mr_table **mrtret, 2639 struct netlink_ext_ack *extack) 2640 { 2641 struct net_device *dev = NULL; 2642 u32 tblid = RT_TABLE_DEFAULT; 2643 struct mr_table *mrt; 2644 struct nlattr *attr; 2645 struct rtmsg *rtm; 2646 int ret, rem; 2647 2648 ret = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX, 2649 rtm_ipmr_policy, extack); 2650 if (ret < 0) 2651 goto out; 2652 rtm = nlmsg_data(nlh); 2653 2654 ret = -EINVAL; 2655 if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 || 2656 rtm->rtm_type != RTN_MULTICAST || 2657 rtm->rtm_scope != RT_SCOPE_UNIVERSE || 2658 !ipmr_rtm_validate_proto(rtm->rtm_protocol)) 2659 goto out; 2660 2661 memset(mfcc, 0, sizeof(*mfcc)); 2662 mfcc->mfcc_parent = -1; 2663 ret = 0; 2664 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) { 2665 switch (nla_type(attr)) { 2666 case RTA_SRC: 2667 mfcc->mfcc_origin.s_addr = nla_get_be32(attr); 2668 break; 2669 case RTA_DST: 2670 mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); 2671 break; 2672 case RTA_IIF: 2673 dev = __dev_get_by_index(net, nla_get_u32(attr)); 2674 if (!dev) { 2675 ret = -ENODEV; 2676 goto out; 2677 } 2678 break; 2679 case RTA_MULTIPATH: 2680 if (ipmr_nla_get_ttls(attr, mfcc) < 0) { 2681 ret = -EINVAL; 2682 goto out; 2683 } 2684 break; 2685 case RTA_PREFSRC: 2686 ret = 1; 2687 break; 2688 case RTA_TABLE: 2689 tblid = nla_get_u32(attr); 2690 break; 2691 } 2692 } 2693 mrt = ipmr_get_table(net, tblid); 2694 if (!mrt) { 2695 ret = -ENOENT; 2696 goto out; 2697 } 2698 *mrtret = mrt; 2699 *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; 2700 if (dev) 2701 mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); 2702 2703 out: 2704 return ret; 2705 } 2706 2707 /* takes care of both newroute and delroute */ 2708 static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, 2709 struct netlink_ext_ack *extack) 2710 { 2711 struct net *net = sock_net(skb->sk); 2712 int ret, mrtsock, parent; 2713 struct mr_table *tbl; 2714 struct mfcctl mfcc; 2715 2716 mrtsock = 0; 2717 tbl = NULL; 2718 ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack); 2719 if (ret < 0) 2720 return ret; 2721 2722 parent = ret ? mfcc.mfcc_parent : -1; 2723 if (nlh->nlmsg_type == RTM_NEWROUTE) 2724 return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); 2725 else 2726 return ipmr_mfc_delete(tbl, &mfcc, parent); 2727 } 2728 2729 static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) 2730 { 2731 u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len); 2732 2733 if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) || 2734 nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) || 2735 nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM, 2736 mrt->mroute_reg_vif_num) || 2737 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT, 2738 mrt->mroute_do_assert) || 2739 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim) || 2740 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE, 2741 mrt->mroute_do_wrvifwhole)) 2742 return false; 2743 2744 return true; 2745 } 2746 2747 static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) 2748 { 2749 struct nlattr *vif_nest; 2750 struct vif_device *vif; 2751 2752 /* if the VIF doesn't exist just continue */ 2753 if (!VIF_EXISTS(mrt, vifid)) 2754 return true; 2755 2756 vif = &mrt->vif_table[vifid]; 2757 vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF); 2758 if (!vif_nest) 2759 return false; 2760 if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) || 2761 nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || 2762 nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || 2763 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in, 2764 IPMRA_VIFA_PAD) || 2765 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out, 2766 IPMRA_VIFA_PAD) || 2767 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in, 2768 IPMRA_VIFA_PAD) || 2769 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out, 2770 IPMRA_VIFA_PAD) || 2771 nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) || 2772 nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) { 2773 nla_nest_cancel(skb, vif_nest); 2774 return false; 2775 } 2776 nla_nest_end(skb, vif_nest); 2777 2778 return true; 2779 } 2780 2781 static int ipmr_valid_dumplink(const struct nlmsghdr *nlh, 2782 struct netlink_ext_ack *extack) 2783 { 2784 struct ifinfomsg *ifm; 2785 2786 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { 2787 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for ipmr link dump"); 2788 return -EINVAL; 2789 } 2790 2791 if (nlmsg_attrlen(nlh, sizeof(*ifm))) { 2792 NL_SET_ERR_MSG(extack, "Invalid data after header in ipmr link dump"); 2793 return -EINVAL; 2794 } 2795 2796 ifm = nlmsg_data(nlh); 2797 if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || 2798 ifm->ifi_change || ifm->ifi_index) { 2799 NL_SET_ERR_MSG(extack, "Invalid values in header for ipmr link dump request"); 2800 return -EINVAL; 2801 } 2802 2803 return 0; 2804 } 2805 2806 static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) 2807 { 2808 struct net *net = sock_net(skb->sk); 2809 struct nlmsghdr *nlh = NULL; 2810 unsigned int t = 0, s_t; 2811 unsigned int e = 0, s_e; 2812 struct mr_table *mrt; 2813 2814 if (cb->strict_check) { 2815 int err = ipmr_valid_dumplink(cb->nlh, cb->extack); 2816 2817 if (err < 0) 2818 return err; 2819 } 2820 2821 s_t = cb->args[0]; 2822 s_e = cb->args[1]; 2823 2824 ipmr_for_each_table(mrt, net) { 2825 struct nlattr *vifs, *af; 2826 struct ifinfomsg *hdr; 2827 u32 i; 2828 2829 if (t < s_t) 2830 goto skip_table; 2831 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 2832 cb->nlh->nlmsg_seq, RTM_NEWLINK, 2833 sizeof(*hdr), NLM_F_MULTI); 2834 if (!nlh) 2835 break; 2836 2837 hdr = nlmsg_data(nlh); 2838 memset(hdr, 0, sizeof(*hdr)); 2839 hdr->ifi_family = RTNL_FAMILY_IPMR; 2840 2841 af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); 2842 if (!af) { 2843 nlmsg_cancel(skb, nlh); 2844 goto out; 2845 } 2846 2847 if (!ipmr_fill_table(mrt, skb)) { 2848 nlmsg_cancel(skb, nlh); 2849 goto out; 2850 } 2851 2852 vifs = nla_nest_start_noflag(skb, IPMRA_TABLE_VIFS); 2853 if (!vifs) { 2854 nla_nest_end(skb, af); 2855 nlmsg_end(skb, nlh); 2856 goto out; 2857 } 2858 for (i = 0; i < mrt->maxvif; i++) { 2859 if (e < s_e) 2860 goto skip_entry; 2861 if (!ipmr_fill_vif(mrt, i, skb)) { 2862 nla_nest_end(skb, vifs); 2863 nla_nest_end(skb, af); 2864 nlmsg_end(skb, nlh); 2865 goto out; 2866 } 2867 skip_entry: 2868 e++; 2869 } 2870 s_e = 0; 2871 e = 0; 2872 nla_nest_end(skb, vifs); 2873 nla_nest_end(skb, af); 2874 nlmsg_end(skb, nlh); 2875 skip_table: 2876 t++; 2877 } 2878 2879 out: 2880 cb->args[1] = e; 2881 cb->args[0] = t; 2882 2883 return skb->len; 2884 } 2885 2886 #ifdef CONFIG_PROC_FS 2887 /* The /proc interfaces to multicast routing : 2888 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif 2889 */ 2890 2891 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 2892 __acquires(mrt_lock) 2893 { 2894 struct mr_vif_iter *iter = seq->private; 2895 struct net *net = seq_file_net(seq); 2896 struct mr_table *mrt; 2897 2898 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2899 if (!mrt) 2900 return ERR_PTR(-ENOENT); 2901 2902 iter->mrt = mrt; 2903 2904 read_lock(&mrt_lock); 2905 return mr_vif_seq_start(seq, pos); 2906 } 2907 2908 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 2909 __releases(mrt_lock) 2910 { 2911 read_unlock(&mrt_lock); 2912 } 2913 2914 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 2915 { 2916 struct mr_vif_iter *iter = seq->private; 2917 struct mr_table *mrt = iter->mrt; 2918 2919 if (v == SEQ_START_TOKEN) { 2920 seq_puts(seq, 2921 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 2922 } else { 2923 const struct vif_device *vif = v; 2924 const char *name = vif->dev ? 2925 vif->dev->name : "none"; 2926 2927 seq_printf(seq, 2928 "%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 2929 vif - mrt->vif_table, 2930 name, vif->bytes_in, vif->pkt_in, 2931 vif->bytes_out, vif->pkt_out, 2932 vif->flags, vif->local, vif->remote); 2933 } 2934 return 0; 2935 } 2936 2937 static const struct seq_operations ipmr_vif_seq_ops = { 2938 .start = ipmr_vif_seq_start, 2939 .next = mr_vif_seq_next, 2940 .stop = ipmr_vif_seq_stop, 2941 .show = ipmr_vif_seq_show, 2942 }; 2943 2944 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 2945 { 2946 struct net *net = seq_file_net(seq); 2947 struct mr_table *mrt; 2948 2949 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2950 if (!mrt) 2951 return ERR_PTR(-ENOENT); 2952 2953 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 2954 } 2955 2956 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 2957 { 2958 int n; 2959 2960 if (v == SEQ_START_TOKEN) { 2961 seq_puts(seq, 2962 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 2963 } else { 2964 const struct mfc_cache *mfc = v; 2965 const struct mr_mfc_iter *it = seq->private; 2966 const struct mr_table *mrt = it->mrt; 2967 2968 seq_printf(seq, "%08X %08X %-3hd", 2969 (__force u32) mfc->mfc_mcastgrp, 2970 (__force u32) mfc->mfc_origin, 2971 mfc->_c.mfc_parent); 2972 2973 if (it->cache != &mrt->mfc_unres_queue) { 2974 seq_printf(seq, " %8lu %8lu %8lu", 2975 mfc->_c.mfc_un.res.pkt, 2976 mfc->_c.mfc_un.res.bytes, 2977 mfc->_c.mfc_un.res.wrong_if); 2978 for (n = mfc->_c.mfc_un.res.minvif; 2979 n < mfc->_c.mfc_un.res.maxvif; n++) { 2980 if (VIF_EXISTS(mrt, n) && 2981 mfc->_c.mfc_un.res.ttls[n] < 255) 2982 seq_printf(seq, 2983 " %2d:%-3d", 2984 n, mfc->_c.mfc_un.res.ttls[n]); 2985 } 2986 } else { 2987 /* unresolved mfc_caches don't contain 2988 * pkt, bytes and wrong_if values 2989 */ 2990 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 2991 } 2992 seq_putc(seq, '\n'); 2993 } 2994 return 0; 2995 } 2996 2997 static const struct seq_operations ipmr_mfc_seq_ops = { 2998 .start = ipmr_mfc_seq_start, 2999 .next = mr_mfc_seq_next, 3000 .stop = mr_mfc_seq_stop, 3001 .show = ipmr_mfc_seq_show, 3002 }; 3003 #endif 3004 3005 #ifdef CONFIG_IP_PIMSM_V2 3006 static const struct net_protocol pim_protocol = { 3007 .handler = pim_rcv, 3008 }; 3009 #endif 3010 3011 static unsigned int ipmr_seq_read(struct net *net) 3012 { 3013 ASSERT_RTNL(); 3014 3015 return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net); 3016 } 3017 3018 static int ipmr_dump(struct net *net, struct notifier_block *nb, 3019 struct netlink_ext_ack *extack) 3020 { 3021 return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump, 3022 ipmr_mr_table_iter, &mrt_lock, extack); 3023 } 3024 3025 static const struct fib_notifier_ops ipmr_notifier_ops_template = { 3026 .family = RTNL_FAMILY_IPMR, 3027 .fib_seq_read = ipmr_seq_read, 3028 .fib_dump = ipmr_dump, 3029 .owner = THIS_MODULE, 3030 }; 3031 3032 static int __net_init ipmr_notifier_init(struct net *net) 3033 { 3034 struct fib_notifier_ops *ops; 3035 3036 net->ipv4.ipmr_seq = 0; 3037 3038 ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net); 3039 if (IS_ERR(ops)) 3040 return PTR_ERR(ops); 3041 net->ipv4.ipmr_notifier_ops = ops; 3042 3043 return 0; 3044 } 3045 3046 static void __net_exit ipmr_notifier_exit(struct net *net) 3047 { 3048 fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops); 3049 net->ipv4.ipmr_notifier_ops = NULL; 3050 } 3051 3052 /* Setup for IP multicast routing */ 3053 static int __net_init ipmr_net_init(struct net *net) 3054 { 3055 int err; 3056 3057 err = ipmr_notifier_init(net); 3058 if (err) 3059 goto ipmr_notifier_fail; 3060 3061 err = ipmr_rules_init(net); 3062 if (err < 0) 3063 goto ipmr_rules_fail; 3064 3065 #ifdef CONFIG_PROC_FS 3066 err = -ENOMEM; 3067 if (!proc_create_net("ip_mr_vif", 0, net->proc_net, &ipmr_vif_seq_ops, 3068 sizeof(struct mr_vif_iter))) 3069 goto proc_vif_fail; 3070 if (!proc_create_net("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 3071 sizeof(struct mr_mfc_iter))) 3072 goto proc_cache_fail; 3073 #endif 3074 return 0; 3075 3076 #ifdef CONFIG_PROC_FS 3077 proc_cache_fail: 3078 remove_proc_entry("ip_mr_vif", net->proc_net); 3079 proc_vif_fail: 3080 ipmr_rules_exit(net); 3081 #endif 3082 ipmr_rules_fail: 3083 ipmr_notifier_exit(net); 3084 ipmr_notifier_fail: 3085 return err; 3086 } 3087 3088 static void __net_exit ipmr_net_exit(struct net *net) 3089 { 3090 #ifdef CONFIG_PROC_FS 3091 remove_proc_entry("ip_mr_cache", net->proc_net); 3092 remove_proc_entry("ip_mr_vif", net->proc_net); 3093 #endif 3094 ipmr_notifier_exit(net); 3095 ipmr_rules_exit(net); 3096 } 3097 3098 static struct pernet_operations ipmr_net_ops = { 3099 .init = ipmr_net_init, 3100 .exit = ipmr_net_exit, 3101 }; 3102 3103 int __init ip_mr_init(void) 3104 { 3105 int err; 3106 3107 mrt_cachep = kmem_cache_create("ip_mrt_cache", 3108 sizeof(struct mfc_cache), 3109 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, 3110 NULL); 3111 3112 err = register_pernet_subsys(&ipmr_net_ops); 3113 if (err) 3114 goto reg_pernet_fail; 3115 3116 err = register_netdevice_notifier(&ip_mr_notifier); 3117 if (err) 3118 goto reg_notif_fail; 3119 #ifdef CONFIG_IP_PIMSM_V2 3120 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { 3121 pr_err("%s: can't add PIM protocol\n", __func__); 3122 err = -EAGAIN; 3123 goto add_proto_fail; 3124 } 3125 #endif 3126 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, 3127 ipmr_rtm_getroute, ipmr_rtm_dumproute, 0); 3128 rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, 3129 ipmr_rtm_route, NULL, 0); 3130 rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, 3131 ipmr_rtm_route, NULL, 0); 3132 3133 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK, 3134 NULL, ipmr_rtm_dumplink, 0); 3135 return 0; 3136 3137 #ifdef CONFIG_IP_PIMSM_V2 3138 add_proto_fail: 3139 unregister_netdevice_notifier(&ip_mr_notifier); 3140 #endif 3141 reg_notif_fail: 3142 unregister_pernet_subsys(&ipmr_net_ops); 3143 reg_pernet_fail: 3144 kmem_cache_destroy(mrt_cachep); 3145 return err; 3146 } 3147