1 /* 2 * IP multicast routing support for mrouted 3.6/3.8 3 * 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> 5 * Linux Consultancy and Custom Driver Development 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Fixes: 13 * Michael Chastain : Incorrect size of copying. 14 * Alan Cox : Added the cache manager code 15 * Alan Cox : Fixed the clone/copy bug and device race. 16 * Mike McLagan : Routing by source 17 * Malcolm Beattie : Buffer handling fixes. 18 * Alexey Kuznetsov : Double buffer free and other fixes. 19 * SVR Anand : Fixed several multicast bugs and problems. 20 * Alexey Kuznetsov : Status, optimisations and more. 21 * Brad Parker : Better behaviour on mrouted upcall 22 * overflow. 23 * Carlos Picoto : PIMv1 Support 24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 25 * Relax this requirement to work with older peers. 26 * 27 */ 28 29 #include <asm/system.h> 30 #include <asm/uaccess.h> 31 #include <linux/types.h> 32 #include <linux/capability.h> 33 #include <linux/errno.h> 34 #include <linux/timer.h> 35 #include <linux/mm.h> 36 #include <linux/kernel.h> 37 #include <linux/fcntl.h> 38 #include <linux/stat.h> 39 #include <linux/socket.h> 40 #include <linux/in.h> 41 #include <linux/inet.h> 42 #include <linux/netdevice.h> 43 #include <linux/inetdevice.h> 44 #include <linux/igmp.h> 45 #include <linux/proc_fs.h> 46 #include <linux/seq_file.h> 47 #include <linux/mroute.h> 48 #include <linux/init.h> 49 #include <linux/if_ether.h> 50 #include <linux/slab.h> 51 #include <net/net_namespace.h> 52 #include <net/ip.h> 53 #include <net/protocol.h> 54 #include <linux/skbuff.h> 55 #include <net/route.h> 56 #include <net/sock.h> 57 #include <net/icmp.h> 58 #include <net/udp.h> 59 #include <net/raw.h> 60 #include <linux/notifier.h> 61 #include <linux/if_arp.h> 62 #include <linux/netfilter_ipv4.h> 63 #include <net/ipip.h> 64 #include <net/checksum.h> 65 #include <net/netlink.h> 66 #include <net/fib_rules.h> 67 68 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 69 #define CONFIG_IP_PIMSM 1 70 #endif 71 72 struct mr_table { 73 struct list_head list; 74 #ifdef CONFIG_NET_NS 75 struct net *net; 76 #endif 77 u32 id; 78 struct sock *mroute_sk; 79 struct timer_list ipmr_expire_timer; 80 struct list_head mfc_unres_queue; 81 struct list_head mfc_cache_array[MFC_LINES]; 82 struct vif_device vif_table[MAXVIFS]; 83 int maxvif; 84 atomic_t cache_resolve_queue_len; 85 int mroute_do_assert; 86 int mroute_do_pim; 87 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 88 int mroute_reg_vif_num; 89 #endif 90 }; 91 92 struct ipmr_rule { 93 struct fib_rule common; 94 }; 95 96 struct ipmr_result { 97 struct mr_table *mrt; 98 }; 99 100 /* Big lock, protecting vif table, mrt cache and mroute socket state. 101 Note that the changes are semaphored via rtnl_lock. 102 */ 103 104 static DEFINE_RWLOCK(mrt_lock); 105 106 /* 107 * Multicast router control variables 108 */ 109 110 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) 111 112 /* Special spinlock for queue of unresolved entries */ 113 static DEFINE_SPINLOCK(mfc_unres_lock); 114 115 /* We return to original Alan's scheme. Hash table of resolved 116 entries is changed only in process context and protected 117 with weak lock mrt_lock. Queue of unresolved entries is protected 118 with strong spinlock mfc_unres_lock. 119 120 In this case data path is free of exclusive locks at all. 121 */ 122 123 static struct kmem_cache *mrt_cachep __read_mostly; 124 125 static struct mr_table *ipmr_new_table(struct net *net, u32 id); 126 static int ip_mr_forward(struct net *net, struct mr_table *mrt, 127 struct sk_buff *skb, struct mfc_cache *cache, 128 int local); 129 static int ipmr_cache_report(struct mr_table *mrt, 130 struct sk_buff *pkt, vifi_t vifi, int assert); 131 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 132 struct mfc_cache *c, struct rtmsg *rtm); 133 static void ipmr_expire_process(unsigned long arg); 134 135 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 136 #define ipmr_for_each_table(mrt, net) \ 137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) 138 139 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 140 { 141 struct mr_table *mrt; 142 143 ipmr_for_each_table(mrt, net) { 144 if (mrt->id == id) 145 return mrt; 146 } 147 return NULL; 148 } 149 150 static int ipmr_fib_lookup(struct net *net, struct flowi *flp, 151 struct mr_table **mrt) 152 { 153 struct ipmr_result res; 154 struct fib_lookup_arg arg = { .result = &res, }; 155 int err; 156 157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg); 158 if (err < 0) 159 return err; 160 *mrt = res.mrt; 161 return 0; 162 } 163 164 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, 165 int flags, struct fib_lookup_arg *arg) 166 { 167 struct ipmr_result *res = arg->result; 168 struct mr_table *mrt; 169 170 switch (rule->action) { 171 case FR_ACT_TO_TBL: 172 break; 173 case FR_ACT_UNREACHABLE: 174 return -ENETUNREACH; 175 case FR_ACT_PROHIBIT: 176 return -EACCES; 177 case FR_ACT_BLACKHOLE: 178 default: 179 return -EINVAL; 180 } 181 182 mrt = ipmr_get_table(rule->fr_net, rule->table); 183 if (mrt == NULL) 184 return -EAGAIN; 185 res->mrt = mrt; 186 return 0; 187 } 188 189 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 190 { 191 return 1; 192 } 193 194 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { 195 FRA_GENERIC_POLICY, 196 }; 197 198 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 199 struct fib_rule_hdr *frh, struct nlattr **tb) 200 { 201 return 0; 202 } 203 204 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 205 struct nlattr **tb) 206 { 207 return 1; 208 } 209 210 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 211 struct fib_rule_hdr *frh) 212 { 213 frh->dst_len = 0; 214 frh->src_len = 0; 215 frh->tos = 0; 216 return 0; 217 } 218 219 static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = { 220 .family = RTNL_FAMILY_IPMR, 221 .rule_size = sizeof(struct ipmr_rule), 222 .addr_size = sizeof(u32), 223 .action = ipmr_rule_action, 224 .match = ipmr_rule_match, 225 .configure = ipmr_rule_configure, 226 .compare = ipmr_rule_compare, 227 .default_pref = fib_default_rule_pref, 228 .fill = ipmr_rule_fill, 229 .nlgroup = RTNLGRP_IPV4_RULE, 230 .policy = ipmr_rule_policy, 231 .owner = THIS_MODULE, 232 }; 233 234 static int __net_init ipmr_rules_init(struct net *net) 235 { 236 struct fib_rules_ops *ops; 237 struct mr_table *mrt; 238 int err; 239 240 ops = fib_rules_register(&ipmr_rules_ops_template, net); 241 if (IS_ERR(ops)) 242 return PTR_ERR(ops); 243 244 INIT_LIST_HEAD(&net->ipv4.mr_tables); 245 246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 247 if (mrt == NULL) { 248 err = -ENOMEM; 249 goto err1; 250 } 251 252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); 253 if (err < 0) 254 goto err2; 255 256 net->ipv4.mr_rules_ops = ops; 257 return 0; 258 259 err2: 260 kfree(mrt); 261 err1: 262 fib_rules_unregister(ops); 263 return err; 264 } 265 266 static void __net_exit ipmr_rules_exit(struct net *net) 267 { 268 struct mr_table *mrt, *next; 269 270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { 271 list_del(&mrt->list); 272 kfree(mrt); 273 } 274 fib_rules_unregister(net->ipv4.mr_rules_ops); 275 } 276 #else 277 #define ipmr_for_each_table(mrt, net) \ 278 for (mrt = net->ipv4.mrt; mrt; mrt = NULL) 279 280 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 281 { 282 return net->ipv4.mrt; 283 } 284 285 static int ipmr_fib_lookup(struct net *net, struct flowi *flp, 286 struct mr_table **mrt) 287 { 288 *mrt = net->ipv4.mrt; 289 return 0; 290 } 291 292 static int __net_init ipmr_rules_init(struct net *net) 293 { 294 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 295 return net->ipv4.mrt ? 0 : -ENOMEM; 296 } 297 298 static void __net_exit ipmr_rules_exit(struct net *net) 299 { 300 kfree(net->ipv4.mrt); 301 } 302 #endif 303 304 static struct mr_table *ipmr_new_table(struct net *net, u32 id) 305 { 306 struct mr_table *mrt; 307 unsigned int i; 308 309 mrt = ipmr_get_table(net, id); 310 if (mrt != NULL) 311 return mrt; 312 313 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); 314 if (mrt == NULL) 315 return NULL; 316 write_pnet(&mrt->net, net); 317 mrt->id = id; 318 319 /* Forwarding cache */ 320 for (i = 0; i < MFC_LINES; i++) 321 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]); 322 323 INIT_LIST_HEAD(&mrt->mfc_unres_queue); 324 325 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, 326 (unsigned long)mrt); 327 328 #ifdef CONFIG_IP_PIMSM 329 mrt->mroute_reg_vif_num = -1; 330 #endif 331 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 332 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); 333 #endif 334 return mrt; 335 } 336 337 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 338 339 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) 340 { 341 struct net *net = dev_net(dev); 342 343 dev_close(dev); 344 345 dev = __dev_get_by_name(net, "tunl0"); 346 if (dev) { 347 const struct net_device_ops *ops = dev->netdev_ops; 348 struct ifreq ifr; 349 struct ip_tunnel_parm p; 350 351 memset(&p, 0, sizeof(p)); 352 p.iph.daddr = v->vifc_rmt_addr.s_addr; 353 p.iph.saddr = v->vifc_lcl_addr.s_addr; 354 p.iph.version = 4; 355 p.iph.ihl = 5; 356 p.iph.protocol = IPPROTO_IPIP; 357 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 358 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 359 360 if (ops->ndo_do_ioctl) { 361 mm_segment_t oldfs = get_fs(); 362 363 set_fs(KERNEL_DS); 364 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); 365 set_fs(oldfs); 366 } 367 } 368 } 369 370 static 371 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) 372 { 373 struct net_device *dev; 374 375 dev = __dev_get_by_name(net, "tunl0"); 376 377 if (dev) { 378 const struct net_device_ops *ops = dev->netdev_ops; 379 int err; 380 struct ifreq ifr; 381 struct ip_tunnel_parm p; 382 struct in_device *in_dev; 383 384 memset(&p, 0, sizeof(p)); 385 p.iph.daddr = v->vifc_rmt_addr.s_addr; 386 p.iph.saddr = v->vifc_lcl_addr.s_addr; 387 p.iph.version = 4; 388 p.iph.ihl = 5; 389 p.iph.protocol = IPPROTO_IPIP; 390 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 391 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 392 393 if (ops->ndo_do_ioctl) { 394 mm_segment_t oldfs = get_fs(); 395 396 set_fs(KERNEL_DS); 397 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); 398 set_fs(oldfs); 399 } else 400 err = -EOPNOTSUPP; 401 402 dev = NULL; 403 404 if (err == 0 && 405 (dev = __dev_get_by_name(net, p.name)) != NULL) { 406 dev->flags |= IFF_MULTICAST; 407 408 in_dev = __in_dev_get_rtnl(dev); 409 if (in_dev == NULL) 410 goto failure; 411 412 ipv4_devconf_setall(in_dev); 413 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 414 415 if (dev_open(dev)) 416 goto failure; 417 dev_hold(dev); 418 } 419 } 420 return dev; 421 422 failure: 423 /* allow the register to be completed before unregistering. */ 424 rtnl_unlock(); 425 rtnl_lock(); 426 427 unregister_netdevice(dev); 428 return NULL; 429 } 430 431 #ifdef CONFIG_IP_PIMSM 432 433 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 434 { 435 struct net *net = dev_net(dev); 436 struct mr_table *mrt; 437 struct flowi fl = { 438 .oif = dev->ifindex, 439 .iif = skb->skb_iif, 440 .mark = skb->mark, 441 }; 442 int err; 443 444 err = ipmr_fib_lookup(net, &fl, &mrt); 445 if (err < 0) { 446 kfree_skb(skb); 447 return err; 448 } 449 450 read_lock(&mrt_lock); 451 dev->stats.tx_bytes += skb->len; 452 dev->stats.tx_packets++; 453 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); 454 read_unlock(&mrt_lock); 455 kfree_skb(skb); 456 return NETDEV_TX_OK; 457 } 458 459 static const struct net_device_ops reg_vif_netdev_ops = { 460 .ndo_start_xmit = reg_vif_xmit, 461 }; 462 463 static void reg_vif_setup(struct net_device *dev) 464 { 465 dev->type = ARPHRD_PIMREG; 466 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 467 dev->flags = IFF_NOARP; 468 dev->netdev_ops = ®_vif_netdev_ops, 469 dev->destructor = free_netdev; 470 dev->features |= NETIF_F_NETNS_LOCAL; 471 } 472 473 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 474 { 475 struct net_device *dev; 476 struct in_device *in_dev; 477 char name[IFNAMSIZ]; 478 479 if (mrt->id == RT_TABLE_DEFAULT) 480 sprintf(name, "pimreg"); 481 else 482 sprintf(name, "pimreg%u", mrt->id); 483 484 dev = alloc_netdev(0, name, reg_vif_setup); 485 486 if (dev == NULL) 487 return NULL; 488 489 dev_net_set(dev, net); 490 491 if (register_netdevice(dev)) { 492 free_netdev(dev); 493 return NULL; 494 } 495 dev->iflink = 0; 496 497 rcu_read_lock(); 498 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { 499 rcu_read_unlock(); 500 goto failure; 501 } 502 503 ipv4_devconf_setall(in_dev); 504 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 505 rcu_read_unlock(); 506 507 if (dev_open(dev)) 508 goto failure; 509 510 dev_hold(dev); 511 512 return dev; 513 514 failure: 515 /* allow the register to be completed before unregistering. */ 516 rtnl_unlock(); 517 rtnl_lock(); 518 519 unregister_netdevice(dev); 520 return NULL; 521 } 522 #endif 523 524 /* 525 * Delete a VIF entry 526 * @notify: Set to 1, if the caller is a notifier_call 527 */ 528 529 static int vif_delete(struct mr_table *mrt, int vifi, int notify, 530 struct list_head *head) 531 { 532 struct vif_device *v; 533 struct net_device *dev; 534 struct in_device *in_dev; 535 536 if (vifi < 0 || vifi >= mrt->maxvif) 537 return -EADDRNOTAVAIL; 538 539 v = &mrt->vif_table[vifi]; 540 541 write_lock_bh(&mrt_lock); 542 dev = v->dev; 543 v->dev = NULL; 544 545 if (!dev) { 546 write_unlock_bh(&mrt_lock); 547 return -EADDRNOTAVAIL; 548 } 549 550 #ifdef CONFIG_IP_PIMSM 551 if (vifi == mrt->mroute_reg_vif_num) 552 mrt->mroute_reg_vif_num = -1; 553 #endif 554 555 if (vifi+1 == mrt->maxvif) { 556 int tmp; 557 for (tmp=vifi-1; tmp>=0; tmp--) { 558 if (VIF_EXISTS(mrt, tmp)) 559 break; 560 } 561 mrt->maxvif = tmp+1; 562 } 563 564 write_unlock_bh(&mrt_lock); 565 566 dev_set_allmulti(dev, -1); 567 568 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { 569 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 570 ip_rt_multicast_event(in_dev); 571 } 572 573 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) 574 unregister_netdevice_queue(dev, head); 575 576 dev_put(dev); 577 return 0; 578 } 579 580 static inline void ipmr_cache_free(struct mfc_cache *c) 581 { 582 kmem_cache_free(mrt_cachep, c); 583 } 584 585 /* Destroy an unresolved cache entry, killing queued skbs 586 and reporting error to netlink readers. 587 */ 588 589 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 590 { 591 struct net *net = read_pnet(&mrt->net); 592 struct sk_buff *skb; 593 struct nlmsgerr *e; 594 595 atomic_dec(&mrt->cache_resolve_queue_len); 596 597 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { 598 if (ip_hdr(skb)->version == 0) { 599 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 600 nlh->nlmsg_type = NLMSG_ERROR; 601 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 602 skb_trim(skb, nlh->nlmsg_len); 603 e = NLMSG_DATA(nlh); 604 e->error = -ETIMEDOUT; 605 memset(&e->msg, 0, sizeof(e->msg)); 606 607 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 608 } else 609 kfree_skb(skb); 610 } 611 612 ipmr_cache_free(c); 613 } 614 615 616 /* Timer process for the unresolved queue. */ 617 618 static void ipmr_expire_process(unsigned long arg) 619 { 620 struct mr_table *mrt = (struct mr_table *)arg; 621 unsigned long now; 622 unsigned long expires; 623 struct mfc_cache *c, *next; 624 625 if (!spin_trylock(&mfc_unres_lock)) { 626 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); 627 return; 628 } 629 630 if (list_empty(&mrt->mfc_unres_queue)) 631 goto out; 632 633 now = jiffies; 634 expires = 10*HZ; 635 636 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 637 if (time_after(c->mfc_un.unres.expires, now)) { 638 unsigned long interval = c->mfc_un.unres.expires - now; 639 if (interval < expires) 640 expires = interval; 641 continue; 642 } 643 644 list_del(&c->list); 645 ipmr_destroy_unres(mrt, c); 646 } 647 648 if (!list_empty(&mrt->mfc_unres_queue)) 649 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 650 651 out: 652 spin_unlock(&mfc_unres_lock); 653 } 654 655 /* Fill oifs list. It is called under write locked mrt_lock. */ 656 657 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, 658 unsigned char *ttls) 659 { 660 int vifi; 661 662 cache->mfc_un.res.minvif = MAXVIFS; 663 cache->mfc_un.res.maxvif = 0; 664 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 665 666 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 667 if (VIF_EXISTS(mrt, vifi) && 668 ttls[vifi] && ttls[vifi] < 255) { 669 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 670 if (cache->mfc_un.res.minvif > vifi) 671 cache->mfc_un.res.minvif = vifi; 672 if (cache->mfc_un.res.maxvif <= vifi) 673 cache->mfc_un.res.maxvif = vifi + 1; 674 } 675 } 676 } 677 678 static int vif_add(struct net *net, struct mr_table *mrt, 679 struct vifctl *vifc, int mrtsock) 680 { 681 int vifi = vifc->vifc_vifi; 682 struct vif_device *v = &mrt->vif_table[vifi]; 683 struct net_device *dev; 684 struct in_device *in_dev; 685 int err; 686 687 /* Is vif busy ? */ 688 if (VIF_EXISTS(mrt, vifi)) 689 return -EADDRINUSE; 690 691 switch (vifc->vifc_flags) { 692 #ifdef CONFIG_IP_PIMSM 693 case VIFF_REGISTER: 694 /* 695 * Special Purpose VIF in PIM 696 * All the packets will be sent to the daemon 697 */ 698 if (mrt->mroute_reg_vif_num >= 0) 699 return -EADDRINUSE; 700 dev = ipmr_reg_vif(net, mrt); 701 if (!dev) 702 return -ENOBUFS; 703 err = dev_set_allmulti(dev, 1); 704 if (err) { 705 unregister_netdevice(dev); 706 dev_put(dev); 707 return err; 708 } 709 break; 710 #endif 711 case VIFF_TUNNEL: 712 dev = ipmr_new_tunnel(net, vifc); 713 if (!dev) 714 return -ENOBUFS; 715 err = dev_set_allmulti(dev, 1); 716 if (err) { 717 ipmr_del_tunnel(dev, vifc); 718 dev_put(dev); 719 return err; 720 } 721 break; 722 723 case VIFF_USE_IFINDEX: 724 case 0: 725 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 726 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 727 if (dev && dev->ip_ptr == NULL) { 728 dev_put(dev); 729 return -EADDRNOTAVAIL; 730 } 731 } else 732 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 733 734 if (!dev) 735 return -EADDRNOTAVAIL; 736 err = dev_set_allmulti(dev, 1); 737 if (err) { 738 dev_put(dev); 739 return err; 740 } 741 break; 742 default: 743 return -EINVAL; 744 } 745 746 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { 747 dev_put(dev); 748 return -EADDRNOTAVAIL; 749 } 750 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 751 ip_rt_multicast_event(in_dev); 752 753 /* 754 * Fill in the VIF structures 755 */ 756 v->rate_limit = vifc->vifc_rate_limit; 757 v->local = vifc->vifc_lcl_addr.s_addr; 758 v->remote = vifc->vifc_rmt_addr.s_addr; 759 v->flags = vifc->vifc_flags; 760 if (!mrtsock) 761 v->flags |= VIFF_STATIC; 762 v->threshold = vifc->vifc_threshold; 763 v->bytes_in = 0; 764 v->bytes_out = 0; 765 v->pkt_in = 0; 766 v->pkt_out = 0; 767 v->link = dev->ifindex; 768 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) 769 v->link = dev->iflink; 770 771 /* And finish update writing critical data */ 772 write_lock_bh(&mrt_lock); 773 v->dev = dev; 774 #ifdef CONFIG_IP_PIMSM 775 if (v->flags&VIFF_REGISTER) 776 mrt->mroute_reg_vif_num = vifi; 777 #endif 778 if (vifi+1 > mrt->maxvif) 779 mrt->maxvif = vifi+1; 780 write_unlock_bh(&mrt_lock); 781 return 0; 782 } 783 784 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 785 __be32 origin, 786 __be32 mcastgrp) 787 { 788 int line = MFC_HASH(mcastgrp, origin); 789 struct mfc_cache *c; 790 791 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { 792 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp) 793 return c; 794 } 795 return NULL; 796 } 797 798 /* 799 * Allocate a multicast cache entry 800 */ 801 static struct mfc_cache *ipmr_cache_alloc(void) 802 { 803 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 804 if (c == NULL) 805 return NULL; 806 c->mfc_un.res.minvif = MAXVIFS; 807 return c; 808 } 809 810 static struct mfc_cache *ipmr_cache_alloc_unres(void) 811 { 812 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 813 if (c == NULL) 814 return NULL; 815 skb_queue_head_init(&c->mfc_un.unres.unresolved); 816 c->mfc_un.unres.expires = jiffies + 10*HZ; 817 return c; 818 } 819 820 /* 821 * A cache entry has gone into a resolved state from queued 822 */ 823 824 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, 825 struct mfc_cache *uc, struct mfc_cache *c) 826 { 827 struct sk_buff *skb; 828 struct nlmsgerr *e; 829 830 /* 831 * Play the pending entries through our router 832 */ 833 834 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 835 if (ip_hdr(skb)->version == 0) { 836 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 837 838 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { 839 nlh->nlmsg_len = (skb_tail_pointer(skb) - 840 (u8 *)nlh); 841 } else { 842 nlh->nlmsg_type = NLMSG_ERROR; 843 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 844 skb_trim(skb, nlh->nlmsg_len); 845 e = NLMSG_DATA(nlh); 846 e->error = -EMSGSIZE; 847 memset(&e->msg, 0, sizeof(e->msg)); 848 } 849 850 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 851 } else 852 ip_mr_forward(net, mrt, skb, c, 0); 853 } 854 } 855 856 /* 857 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted 858 * expects the following bizarre scheme. 859 * 860 * Called under mrt_lock. 861 */ 862 863 static int ipmr_cache_report(struct mr_table *mrt, 864 struct sk_buff *pkt, vifi_t vifi, int assert) 865 { 866 struct sk_buff *skb; 867 const int ihl = ip_hdrlen(pkt); 868 struct igmphdr *igmp; 869 struct igmpmsg *msg; 870 int ret; 871 872 #ifdef CONFIG_IP_PIMSM 873 if (assert == IGMPMSG_WHOLEPKT) 874 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 875 else 876 #endif 877 skb = alloc_skb(128, GFP_ATOMIC); 878 879 if (!skb) 880 return -ENOBUFS; 881 882 #ifdef CONFIG_IP_PIMSM 883 if (assert == IGMPMSG_WHOLEPKT) { 884 /* Ugly, but we have no choice with this interface. 885 Duplicate old header, fix ihl, length etc. 886 And all this only to mangle msg->im_msgtype and 887 to set msg->im_mbz to "mbz" :-) 888 */ 889 skb_push(skb, sizeof(struct iphdr)); 890 skb_reset_network_header(skb); 891 skb_reset_transport_header(skb); 892 msg = (struct igmpmsg *)skb_network_header(skb); 893 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 894 msg->im_msgtype = IGMPMSG_WHOLEPKT; 895 msg->im_mbz = 0; 896 msg->im_vif = mrt->mroute_reg_vif_num; 897 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 898 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 899 sizeof(struct iphdr)); 900 } else 901 #endif 902 { 903 904 /* 905 * Copy the IP header 906 */ 907 908 skb->network_header = skb->tail; 909 skb_put(skb, ihl); 910 skb_copy_to_linear_data(skb, pkt->data, ihl); 911 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ 912 msg = (struct igmpmsg *)skb_network_header(skb); 913 msg->im_vif = vifi; 914 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 915 916 /* 917 * Add our header 918 */ 919 920 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); 921 igmp->type = 922 msg->im_msgtype = assert; 923 igmp->code = 0; 924 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 925 skb->transport_header = skb->network_header; 926 } 927 928 if (mrt->mroute_sk == NULL) { 929 kfree_skb(skb); 930 return -EINVAL; 931 } 932 933 /* 934 * Deliver to mrouted 935 */ 936 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb); 937 if (ret < 0) { 938 if (net_ratelimit()) 939 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); 940 kfree_skb(skb); 941 } 942 943 return ret; 944 } 945 946 /* 947 * Queue a packet for resolution. It gets locked cache entry! 948 */ 949 950 static int 951 ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) 952 { 953 bool found = false; 954 int err; 955 struct mfc_cache *c; 956 const struct iphdr *iph = ip_hdr(skb); 957 958 spin_lock_bh(&mfc_unres_lock); 959 list_for_each_entry(c, &mrt->mfc_unres_queue, list) { 960 if (c->mfc_mcastgrp == iph->daddr && 961 c->mfc_origin == iph->saddr) { 962 found = true; 963 break; 964 } 965 } 966 967 if (!found) { 968 /* 969 * Create a new entry if allowable 970 */ 971 972 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 973 (c = ipmr_cache_alloc_unres()) == NULL) { 974 spin_unlock_bh(&mfc_unres_lock); 975 976 kfree_skb(skb); 977 return -ENOBUFS; 978 } 979 980 /* 981 * Fill in the new cache entry 982 */ 983 c->mfc_parent = -1; 984 c->mfc_origin = iph->saddr; 985 c->mfc_mcastgrp = iph->daddr; 986 987 /* 988 * Reflect first query at mrouted. 989 */ 990 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 991 if (err < 0) { 992 /* If the report failed throw the cache entry 993 out - Brad Parker 994 */ 995 spin_unlock_bh(&mfc_unres_lock); 996 997 ipmr_cache_free(c); 998 kfree_skb(skb); 999 return err; 1000 } 1001 1002 atomic_inc(&mrt->cache_resolve_queue_len); 1003 list_add(&c->list, &mrt->mfc_unres_queue); 1004 1005 if (atomic_read(&mrt->cache_resolve_queue_len) == 1) 1006 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); 1007 } 1008 1009 /* 1010 * See if we can append the packet 1011 */ 1012 if (c->mfc_un.unres.unresolved.qlen>3) { 1013 kfree_skb(skb); 1014 err = -ENOBUFS; 1015 } else { 1016 skb_queue_tail(&c->mfc_un.unres.unresolved, skb); 1017 err = 0; 1018 } 1019 1020 spin_unlock_bh(&mfc_unres_lock); 1021 return err; 1022 } 1023 1024 /* 1025 * MFC cache manipulation by user space mroute daemon 1026 */ 1027 1028 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) 1029 { 1030 int line; 1031 struct mfc_cache *c, *next; 1032 1033 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 1034 1035 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) { 1036 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1037 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 1038 write_lock_bh(&mrt_lock); 1039 list_del(&c->list); 1040 write_unlock_bh(&mrt_lock); 1041 1042 ipmr_cache_free(c); 1043 return 0; 1044 } 1045 } 1046 return -ENOENT; 1047 } 1048 1049 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, 1050 struct mfcctl *mfc, int mrtsock) 1051 { 1052 bool found = false; 1053 int line; 1054 struct mfc_cache *uc, *c; 1055 1056 if (mfc->mfcc_parent >= MAXVIFS) 1057 return -ENFILE; 1058 1059 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 1060 1061 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { 1062 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1063 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 1064 found = true; 1065 break; 1066 } 1067 } 1068 1069 if (found) { 1070 write_lock_bh(&mrt_lock); 1071 c->mfc_parent = mfc->mfcc_parent; 1072 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); 1073 if (!mrtsock) 1074 c->mfc_flags |= MFC_STATIC; 1075 write_unlock_bh(&mrt_lock); 1076 return 0; 1077 } 1078 1079 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1080 return -EINVAL; 1081 1082 c = ipmr_cache_alloc(); 1083 if (c == NULL) 1084 return -ENOMEM; 1085 1086 c->mfc_origin = mfc->mfcc_origin.s_addr; 1087 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1088 c->mfc_parent = mfc->mfcc_parent; 1089 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); 1090 if (!mrtsock) 1091 c->mfc_flags |= MFC_STATIC; 1092 1093 write_lock_bh(&mrt_lock); 1094 list_add(&c->list, &mrt->mfc_cache_array[line]); 1095 write_unlock_bh(&mrt_lock); 1096 1097 /* 1098 * Check to see if we resolved a queued list. If so we 1099 * need to send on the frames and tidy up. 1100 */ 1101 found = false; 1102 spin_lock_bh(&mfc_unres_lock); 1103 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) { 1104 if (uc->mfc_origin == c->mfc_origin && 1105 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1106 list_del(&uc->list); 1107 atomic_dec(&mrt->cache_resolve_queue_len); 1108 found = true; 1109 break; 1110 } 1111 } 1112 if (list_empty(&mrt->mfc_unres_queue)) 1113 del_timer(&mrt->ipmr_expire_timer); 1114 spin_unlock_bh(&mfc_unres_lock); 1115 1116 if (found) { 1117 ipmr_cache_resolve(net, mrt, uc, c); 1118 ipmr_cache_free(uc); 1119 } 1120 return 0; 1121 } 1122 1123 /* 1124 * Close the multicast socket, and clear the vif tables etc 1125 */ 1126 1127 static void mroute_clean_tables(struct mr_table *mrt) 1128 { 1129 int i; 1130 LIST_HEAD(list); 1131 struct mfc_cache *c, *next; 1132 1133 /* 1134 * Shut down all active vif entries 1135 */ 1136 for (i = 0; i < mrt->maxvif; i++) { 1137 if (!(mrt->vif_table[i].flags&VIFF_STATIC)) 1138 vif_delete(mrt, i, 0, &list); 1139 } 1140 unregister_netdevice_many(&list); 1141 1142 /* 1143 * Wipe the cache 1144 */ 1145 for (i = 0; i < MFC_LINES; i++) { 1146 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { 1147 if (c->mfc_flags&MFC_STATIC) 1148 continue; 1149 write_lock_bh(&mrt_lock); 1150 list_del(&c->list); 1151 write_unlock_bh(&mrt_lock); 1152 1153 ipmr_cache_free(c); 1154 } 1155 } 1156 1157 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1158 spin_lock_bh(&mfc_unres_lock); 1159 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 1160 list_del(&c->list); 1161 ipmr_destroy_unres(mrt, c); 1162 } 1163 spin_unlock_bh(&mfc_unres_lock); 1164 } 1165 } 1166 1167 static void mrtsock_destruct(struct sock *sk) 1168 { 1169 struct net *net = sock_net(sk); 1170 struct mr_table *mrt; 1171 1172 rtnl_lock(); 1173 ipmr_for_each_table(mrt, net) { 1174 if (sk == mrt->mroute_sk) { 1175 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1176 1177 write_lock_bh(&mrt_lock); 1178 mrt->mroute_sk = NULL; 1179 write_unlock_bh(&mrt_lock); 1180 1181 mroute_clean_tables(mrt); 1182 } 1183 } 1184 rtnl_unlock(); 1185 } 1186 1187 /* 1188 * Socket options and virtual interface manipulation. The whole 1189 * virtual interface system is a complete heap, but unfortunately 1190 * that's how BSD mrouted happens to think. Maybe one day with a proper 1191 * MOSPF/PIM router set up we can clean this up. 1192 */ 1193 1194 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) 1195 { 1196 int ret; 1197 struct vifctl vif; 1198 struct mfcctl mfc; 1199 struct net *net = sock_net(sk); 1200 struct mr_table *mrt; 1201 1202 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1203 if (mrt == NULL) 1204 return -ENOENT; 1205 1206 if (optname != MRT_INIT) { 1207 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN)) 1208 return -EACCES; 1209 } 1210 1211 switch (optname) { 1212 case MRT_INIT: 1213 if (sk->sk_type != SOCK_RAW || 1214 inet_sk(sk)->inet_num != IPPROTO_IGMP) 1215 return -EOPNOTSUPP; 1216 if (optlen != sizeof(int)) 1217 return -ENOPROTOOPT; 1218 1219 rtnl_lock(); 1220 if (mrt->mroute_sk) { 1221 rtnl_unlock(); 1222 return -EADDRINUSE; 1223 } 1224 1225 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1226 if (ret == 0) { 1227 write_lock_bh(&mrt_lock); 1228 mrt->mroute_sk = sk; 1229 write_unlock_bh(&mrt_lock); 1230 1231 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1232 } 1233 rtnl_unlock(); 1234 return ret; 1235 case MRT_DONE: 1236 if (sk != mrt->mroute_sk) 1237 return -EACCES; 1238 return ip_ra_control(sk, 0, NULL); 1239 case MRT_ADD_VIF: 1240 case MRT_DEL_VIF: 1241 if (optlen != sizeof(vif)) 1242 return -EINVAL; 1243 if (copy_from_user(&vif, optval, sizeof(vif))) 1244 return -EFAULT; 1245 if (vif.vifc_vifi >= MAXVIFS) 1246 return -ENFILE; 1247 rtnl_lock(); 1248 if (optname == MRT_ADD_VIF) { 1249 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk); 1250 } else { 1251 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1252 } 1253 rtnl_unlock(); 1254 return ret; 1255 1256 /* 1257 * Manipulate the forwarding caches. These live 1258 * in a sort of kernel/user symbiosis. 1259 */ 1260 case MRT_ADD_MFC: 1261 case MRT_DEL_MFC: 1262 if (optlen != sizeof(mfc)) 1263 return -EINVAL; 1264 if (copy_from_user(&mfc, optval, sizeof(mfc))) 1265 return -EFAULT; 1266 rtnl_lock(); 1267 if (optname == MRT_DEL_MFC) 1268 ret = ipmr_mfc_delete(mrt, &mfc); 1269 else 1270 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk); 1271 rtnl_unlock(); 1272 return ret; 1273 /* 1274 * Control PIM assert. 1275 */ 1276 case MRT_ASSERT: 1277 { 1278 int v; 1279 if (get_user(v,(int __user *)optval)) 1280 return -EFAULT; 1281 mrt->mroute_do_assert = (v) ? 1 : 0; 1282 return 0; 1283 } 1284 #ifdef CONFIG_IP_PIMSM 1285 case MRT_PIM: 1286 { 1287 int v; 1288 1289 if (get_user(v,(int __user *)optval)) 1290 return -EFAULT; 1291 v = (v) ? 1 : 0; 1292 1293 rtnl_lock(); 1294 ret = 0; 1295 if (v != mrt->mroute_do_pim) { 1296 mrt->mroute_do_pim = v; 1297 mrt->mroute_do_assert = v; 1298 } 1299 rtnl_unlock(); 1300 return ret; 1301 } 1302 #endif 1303 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 1304 case MRT_TABLE: 1305 { 1306 u32 v; 1307 1308 if (optlen != sizeof(u32)) 1309 return -EINVAL; 1310 if (get_user(v, (u32 __user *)optval)) 1311 return -EFAULT; 1312 if (sk == mrt->mroute_sk) 1313 return -EBUSY; 1314 1315 rtnl_lock(); 1316 ret = 0; 1317 if (!ipmr_new_table(net, v)) 1318 ret = -ENOMEM; 1319 raw_sk(sk)->ipmr_table = v; 1320 rtnl_unlock(); 1321 return ret; 1322 } 1323 #endif 1324 /* 1325 * Spurious command, or MRT_VERSION which you cannot 1326 * set. 1327 */ 1328 default: 1329 return -ENOPROTOOPT; 1330 } 1331 } 1332 1333 /* 1334 * Getsock opt support for the multicast routing system. 1335 */ 1336 1337 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) 1338 { 1339 int olr; 1340 int val; 1341 struct net *net = sock_net(sk); 1342 struct mr_table *mrt; 1343 1344 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1345 if (mrt == NULL) 1346 return -ENOENT; 1347 1348 if (optname != MRT_VERSION && 1349 #ifdef CONFIG_IP_PIMSM 1350 optname!=MRT_PIM && 1351 #endif 1352 optname!=MRT_ASSERT) 1353 return -ENOPROTOOPT; 1354 1355 if (get_user(olr, optlen)) 1356 return -EFAULT; 1357 1358 olr = min_t(unsigned int, olr, sizeof(int)); 1359 if (olr < 0) 1360 return -EINVAL; 1361 1362 if (put_user(olr, optlen)) 1363 return -EFAULT; 1364 if (optname == MRT_VERSION) 1365 val = 0x0305; 1366 #ifdef CONFIG_IP_PIMSM 1367 else if (optname == MRT_PIM) 1368 val = mrt->mroute_do_pim; 1369 #endif 1370 else 1371 val = mrt->mroute_do_assert; 1372 if (copy_to_user(optval, &val, olr)) 1373 return -EFAULT; 1374 return 0; 1375 } 1376 1377 /* 1378 * The IP multicast ioctl support routines. 1379 */ 1380 1381 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) 1382 { 1383 struct sioc_sg_req sr; 1384 struct sioc_vif_req vr; 1385 struct vif_device *vif; 1386 struct mfc_cache *c; 1387 struct net *net = sock_net(sk); 1388 struct mr_table *mrt; 1389 1390 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1391 if (mrt == NULL) 1392 return -ENOENT; 1393 1394 switch (cmd) { 1395 case SIOCGETVIFCNT: 1396 if (copy_from_user(&vr, arg, sizeof(vr))) 1397 return -EFAULT; 1398 if (vr.vifi >= mrt->maxvif) 1399 return -EINVAL; 1400 read_lock(&mrt_lock); 1401 vif = &mrt->vif_table[vr.vifi]; 1402 if (VIF_EXISTS(mrt, vr.vifi)) { 1403 vr.icount = vif->pkt_in; 1404 vr.ocount = vif->pkt_out; 1405 vr.ibytes = vif->bytes_in; 1406 vr.obytes = vif->bytes_out; 1407 read_unlock(&mrt_lock); 1408 1409 if (copy_to_user(arg, &vr, sizeof(vr))) 1410 return -EFAULT; 1411 return 0; 1412 } 1413 read_unlock(&mrt_lock); 1414 return -EADDRNOTAVAIL; 1415 case SIOCGETSGCNT: 1416 if (copy_from_user(&sr, arg, sizeof(sr))) 1417 return -EFAULT; 1418 1419 read_lock(&mrt_lock); 1420 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1421 if (c) { 1422 sr.pktcnt = c->mfc_un.res.pkt; 1423 sr.bytecnt = c->mfc_un.res.bytes; 1424 sr.wrong_if = c->mfc_un.res.wrong_if; 1425 read_unlock(&mrt_lock); 1426 1427 if (copy_to_user(arg, &sr, sizeof(sr))) 1428 return -EFAULT; 1429 return 0; 1430 } 1431 read_unlock(&mrt_lock); 1432 return -EADDRNOTAVAIL; 1433 default: 1434 return -ENOIOCTLCMD; 1435 } 1436 } 1437 1438 1439 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1440 { 1441 struct net_device *dev = ptr; 1442 struct net *net = dev_net(dev); 1443 struct mr_table *mrt; 1444 struct vif_device *v; 1445 int ct; 1446 LIST_HEAD(list); 1447 1448 if (event != NETDEV_UNREGISTER) 1449 return NOTIFY_DONE; 1450 1451 ipmr_for_each_table(mrt, net) { 1452 v = &mrt->vif_table[0]; 1453 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1454 if (v->dev == dev) 1455 vif_delete(mrt, ct, 1, &list); 1456 } 1457 } 1458 unregister_netdevice_many(&list); 1459 return NOTIFY_DONE; 1460 } 1461 1462 1463 static struct notifier_block ip_mr_notifier = { 1464 .notifier_call = ipmr_device_event, 1465 }; 1466 1467 /* 1468 * Encapsulate a packet by attaching a valid IPIP header to it. 1469 * This avoids tunnel drivers and other mess and gives us the speed so 1470 * important for multicast video. 1471 */ 1472 1473 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) 1474 { 1475 struct iphdr *iph; 1476 struct iphdr *old_iph = ip_hdr(skb); 1477 1478 skb_push(skb, sizeof(struct iphdr)); 1479 skb->transport_header = skb->network_header; 1480 skb_reset_network_header(skb); 1481 iph = ip_hdr(skb); 1482 1483 iph->version = 4; 1484 iph->tos = old_iph->tos; 1485 iph->ttl = old_iph->ttl; 1486 iph->frag_off = 0; 1487 iph->daddr = daddr; 1488 iph->saddr = saddr; 1489 iph->protocol = IPPROTO_IPIP; 1490 iph->ihl = 5; 1491 iph->tot_len = htons(skb->len); 1492 ip_select_ident(iph, skb_dst(skb), NULL); 1493 ip_send_check(iph); 1494 1495 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1496 nf_reset(skb); 1497 } 1498 1499 static inline int ipmr_forward_finish(struct sk_buff *skb) 1500 { 1501 struct ip_options * opt = &(IPCB(skb)->opt); 1502 1503 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); 1504 1505 if (unlikely(opt->optlen)) 1506 ip_forward_options(skb); 1507 1508 return dst_output(skb); 1509 } 1510 1511 /* 1512 * Processing handlers for ipmr_forward 1513 */ 1514 1515 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, 1516 struct sk_buff *skb, struct mfc_cache *c, int vifi) 1517 { 1518 const struct iphdr *iph = ip_hdr(skb); 1519 struct vif_device *vif = &mrt->vif_table[vifi]; 1520 struct net_device *dev; 1521 struct rtable *rt; 1522 int encap = 0; 1523 1524 if (vif->dev == NULL) 1525 goto out_free; 1526 1527 #ifdef CONFIG_IP_PIMSM 1528 if (vif->flags & VIFF_REGISTER) { 1529 vif->pkt_out++; 1530 vif->bytes_out += skb->len; 1531 vif->dev->stats.tx_bytes += skb->len; 1532 vif->dev->stats.tx_packets++; 1533 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); 1534 goto out_free; 1535 } 1536 #endif 1537 1538 if (vif->flags&VIFF_TUNNEL) { 1539 struct flowi fl = { .oif = vif->link, 1540 .nl_u = { .ip4_u = 1541 { .daddr = vif->remote, 1542 .saddr = vif->local, 1543 .tos = RT_TOS(iph->tos) } }, 1544 .proto = IPPROTO_IPIP }; 1545 if (ip_route_output_key(net, &rt, &fl)) 1546 goto out_free; 1547 encap = sizeof(struct iphdr); 1548 } else { 1549 struct flowi fl = { .oif = vif->link, 1550 .nl_u = { .ip4_u = 1551 { .daddr = iph->daddr, 1552 .tos = RT_TOS(iph->tos) } }, 1553 .proto = IPPROTO_IPIP }; 1554 if (ip_route_output_key(net, &rt, &fl)) 1555 goto out_free; 1556 } 1557 1558 dev = rt->dst.dev; 1559 1560 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { 1561 /* Do not fragment multicasts. Alas, IPv4 does not 1562 allow to send ICMP, so that packets will disappear 1563 to blackhole. 1564 */ 1565 1566 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 1567 ip_rt_put(rt); 1568 goto out_free; 1569 } 1570 1571 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len; 1572 1573 if (skb_cow(skb, encap)) { 1574 ip_rt_put(rt); 1575 goto out_free; 1576 } 1577 1578 vif->pkt_out++; 1579 vif->bytes_out += skb->len; 1580 1581 skb_dst_drop(skb); 1582 skb_dst_set(skb, &rt->dst); 1583 ip_decrease_ttl(ip_hdr(skb)); 1584 1585 /* FIXME: forward and output firewalls used to be called here. 1586 * What do we do with netfilter? -- RR */ 1587 if (vif->flags & VIFF_TUNNEL) { 1588 ip_encap(skb, vif->local, vif->remote); 1589 /* FIXME: extra output firewall step used to be here. --RR */ 1590 vif->dev->stats.tx_packets++; 1591 vif->dev->stats.tx_bytes += skb->len; 1592 } 1593 1594 IPCB(skb)->flags |= IPSKB_FORWARDED; 1595 1596 /* 1597 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1598 * not only before forwarding, but after forwarding on all output 1599 * interfaces. It is clear, if mrouter runs a multicasting 1600 * program, it should receive packets not depending to what interface 1601 * program is joined. 1602 * If we will not make it, the program will have to join on all 1603 * interfaces. On the other hand, multihoming host (or router, but 1604 * not mrouter) cannot join to more than one interface - it will 1605 * result in receiving multiple packets. 1606 */ 1607 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev, 1608 ipmr_forward_finish); 1609 return; 1610 1611 out_free: 1612 kfree_skb(skb); 1613 } 1614 1615 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) 1616 { 1617 int ct; 1618 1619 for (ct = mrt->maxvif-1; ct >= 0; ct--) { 1620 if (mrt->vif_table[ct].dev == dev) 1621 break; 1622 } 1623 return ct; 1624 } 1625 1626 /* "local" means that we should preserve one skb (for local delivery) */ 1627 1628 static int ip_mr_forward(struct net *net, struct mr_table *mrt, 1629 struct sk_buff *skb, struct mfc_cache *cache, 1630 int local) 1631 { 1632 int psend = -1; 1633 int vif, ct; 1634 1635 vif = cache->mfc_parent; 1636 cache->mfc_un.res.pkt++; 1637 cache->mfc_un.res.bytes += skb->len; 1638 1639 /* 1640 * Wrong interface: drop packet and (maybe) send PIM assert. 1641 */ 1642 if (mrt->vif_table[vif].dev != skb->dev) { 1643 int true_vifi; 1644 1645 if (skb_rtable(skb)->fl.iif == 0) { 1646 /* It is our own packet, looped back. 1647 Very complicated situation... 1648 1649 The best workaround until routing daemons will be 1650 fixed is not to redistribute packet, if it was 1651 send through wrong interface. It means, that 1652 multicast applications WILL NOT work for 1653 (S,G), which have default multicast route pointing 1654 to wrong oif. In any case, it is not a good 1655 idea to use multicasting applications on router. 1656 */ 1657 goto dont_forward; 1658 } 1659 1660 cache->mfc_un.res.wrong_if++; 1661 true_vifi = ipmr_find_vif(mrt, skb->dev); 1662 1663 if (true_vifi >= 0 && mrt->mroute_do_assert && 1664 /* pimsm uses asserts, when switching from RPT to SPT, 1665 so that we cannot check that packet arrived on an oif. 1666 It is bad, but otherwise we would need to move pretty 1667 large chunk of pimd to kernel. Ough... --ANK 1668 */ 1669 (mrt->mroute_do_pim || 1670 cache->mfc_un.res.ttls[true_vifi] < 255) && 1671 time_after(jiffies, 1672 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 1673 cache->mfc_un.res.last_assert = jiffies; 1674 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); 1675 } 1676 goto dont_forward; 1677 } 1678 1679 mrt->vif_table[vif].pkt_in++; 1680 mrt->vif_table[vif].bytes_in += skb->len; 1681 1682 /* 1683 * Forward the frame 1684 */ 1685 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { 1686 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { 1687 if (psend != -1) { 1688 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1689 if (skb2) 1690 ipmr_queue_xmit(net, mrt, skb2, cache, 1691 psend); 1692 } 1693 psend = ct; 1694 } 1695 } 1696 if (psend != -1) { 1697 if (local) { 1698 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1699 if (skb2) 1700 ipmr_queue_xmit(net, mrt, skb2, cache, psend); 1701 } else { 1702 ipmr_queue_xmit(net, mrt, skb, cache, psend); 1703 return 0; 1704 } 1705 } 1706 1707 dont_forward: 1708 if (!local) 1709 kfree_skb(skb); 1710 return 0; 1711 } 1712 1713 1714 /* 1715 * Multicast packets for forwarding arrive here 1716 */ 1717 1718 int ip_mr_input(struct sk_buff *skb) 1719 { 1720 struct mfc_cache *cache; 1721 struct net *net = dev_net(skb->dev); 1722 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 1723 struct mr_table *mrt; 1724 int err; 1725 1726 /* Packet is looped back after forward, it should not be 1727 forwarded second time, but still can be delivered locally. 1728 */ 1729 if (IPCB(skb)->flags&IPSKB_FORWARDED) 1730 goto dont_forward; 1731 1732 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); 1733 if (err < 0) { 1734 kfree_skb(skb); 1735 return err; 1736 } 1737 1738 if (!local) { 1739 if (IPCB(skb)->opt.router_alert) { 1740 if (ip_call_ra_chain(skb)) 1741 return 0; 1742 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){ 1743 /* IGMPv1 (and broken IGMPv2 implementations sort of 1744 Cisco IOS <= 11.2(8)) do not put router alert 1745 option to IGMP packets destined to routable 1746 groups. It is very bad, because it means 1747 that we can forward NO IGMP messages. 1748 */ 1749 read_lock(&mrt_lock); 1750 if (mrt->mroute_sk) { 1751 nf_reset(skb); 1752 raw_rcv(mrt->mroute_sk, skb); 1753 read_unlock(&mrt_lock); 1754 return 0; 1755 } 1756 read_unlock(&mrt_lock); 1757 } 1758 } 1759 1760 read_lock(&mrt_lock); 1761 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 1762 1763 /* 1764 * No usable cache entry 1765 */ 1766 if (cache == NULL) { 1767 int vif; 1768 1769 if (local) { 1770 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1771 ip_local_deliver(skb); 1772 if (skb2 == NULL) { 1773 read_unlock(&mrt_lock); 1774 return -ENOBUFS; 1775 } 1776 skb = skb2; 1777 } 1778 1779 vif = ipmr_find_vif(mrt, skb->dev); 1780 if (vif >= 0) { 1781 int err2 = ipmr_cache_unresolved(mrt, vif, skb); 1782 read_unlock(&mrt_lock); 1783 1784 return err2; 1785 } 1786 read_unlock(&mrt_lock); 1787 kfree_skb(skb); 1788 return -ENODEV; 1789 } 1790 1791 ip_mr_forward(net, mrt, skb, cache, local); 1792 1793 read_unlock(&mrt_lock); 1794 1795 if (local) 1796 return ip_local_deliver(skb); 1797 1798 return 0; 1799 1800 dont_forward: 1801 if (local) 1802 return ip_local_deliver(skb); 1803 kfree_skb(skb); 1804 return 0; 1805 } 1806 1807 #ifdef CONFIG_IP_PIMSM 1808 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 1809 unsigned int pimlen) 1810 { 1811 struct net_device *reg_dev = NULL; 1812 struct iphdr *encap; 1813 1814 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 1815 /* 1816 Check that: 1817 a. packet is really destinted to a multicast group 1818 b. packet is not a NULL-REGISTER 1819 c. packet is not truncated 1820 */ 1821 if (!ipv4_is_multicast(encap->daddr) || 1822 encap->tot_len == 0 || 1823 ntohs(encap->tot_len) + pimlen > skb->len) 1824 return 1; 1825 1826 read_lock(&mrt_lock); 1827 if (mrt->mroute_reg_vif_num >= 0) 1828 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; 1829 if (reg_dev) 1830 dev_hold(reg_dev); 1831 read_unlock(&mrt_lock); 1832 1833 if (reg_dev == NULL) 1834 return 1; 1835 1836 skb->mac_header = skb->network_header; 1837 skb_pull(skb, (u8*)encap - skb->data); 1838 skb_reset_network_header(skb); 1839 skb->protocol = htons(ETH_P_IP); 1840 skb->ip_summed = 0; 1841 skb->pkt_type = PACKET_HOST; 1842 1843 skb_tunnel_rx(skb, reg_dev); 1844 1845 netif_rx(skb); 1846 dev_put(reg_dev); 1847 1848 return 0; 1849 } 1850 #endif 1851 1852 #ifdef CONFIG_IP_PIMSM_V1 1853 /* 1854 * Handle IGMP messages of PIMv1 1855 */ 1856 1857 int pim_rcv_v1(struct sk_buff * skb) 1858 { 1859 struct igmphdr *pim; 1860 struct net *net = dev_net(skb->dev); 1861 struct mr_table *mrt; 1862 1863 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 1864 goto drop; 1865 1866 pim = igmp_hdr(skb); 1867 1868 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) 1869 goto drop; 1870 1871 if (!mrt->mroute_do_pim || 1872 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 1873 goto drop; 1874 1875 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 1876 drop: 1877 kfree_skb(skb); 1878 } 1879 return 0; 1880 } 1881 #endif 1882 1883 #ifdef CONFIG_IP_PIMSM_V2 1884 static int pim_rcv(struct sk_buff * skb) 1885 { 1886 struct pimreghdr *pim; 1887 struct net *net = dev_net(skb->dev); 1888 struct mr_table *mrt; 1889 1890 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 1891 goto drop; 1892 1893 pim = (struct pimreghdr *)skb_transport_header(skb); 1894 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || 1895 (pim->flags&PIM_NULL_REGISTER) || 1896 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 1897 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 1898 goto drop; 1899 1900 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) 1901 goto drop; 1902 1903 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 1904 drop: 1905 kfree_skb(skb); 1906 } 1907 return 0; 1908 } 1909 #endif 1910 1911 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 1912 struct mfc_cache *c, struct rtmsg *rtm) 1913 { 1914 int ct; 1915 struct rtnexthop *nhp; 1916 u8 *b = skb_tail_pointer(skb); 1917 struct rtattr *mp_head; 1918 1919 /* If cache is unresolved, don't try to parse IIF and OIF */ 1920 if (c->mfc_parent >= MAXVIFS) 1921 return -ENOENT; 1922 1923 if (VIF_EXISTS(mrt, c->mfc_parent)) 1924 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex); 1925 1926 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); 1927 1928 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 1929 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { 1930 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 1931 goto rtattr_failure; 1932 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 1933 nhp->rtnh_flags = 0; 1934 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 1935 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex; 1936 nhp->rtnh_len = sizeof(*nhp); 1937 } 1938 } 1939 mp_head->rta_type = RTA_MULTIPATH; 1940 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; 1941 rtm->rtm_type = RTN_MULTICAST; 1942 return 1; 1943 1944 rtattr_failure: 1945 nlmsg_trim(skb, b); 1946 return -EMSGSIZE; 1947 } 1948 1949 int ipmr_get_route(struct net *net, 1950 struct sk_buff *skb, struct rtmsg *rtm, int nowait) 1951 { 1952 int err; 1953 struct mr_table *mrt; 1954 struct mfc_cache *cache; 1955 struct rtable *rt = skb_rtable(skb); 1956 1957 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 1958 if (mrt == NULL) 1959 return -ENOENT; 1960 1961 read_lock(&mrt_lock); 1962 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst); 1963 1964 if (cache == NULL) { 1965 struct sk_buff *skb2; 1966 struct iphdr *iph; 1967 struct net_device *dev; 1968 int vif; 1969 1970 if (nowait) { 1971 read_unlock(&mrt_lock); 1972 return -EAGAIN; 1973 } 1974 1975 dev = skb->dev; 1976 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) { 1977 read_unlock(&mrt_lock); 1978 return -ENODEV; 1979 } 1980 skb2 = skb_clone(skb, GFP_ATOMIC); 1981 if (!skb2) { 1982 read_unlock(&mrt_lock); 1983 return -ENOMEM; 1984 } 1985 1986 skb_push(skb2, sizeof(struct iphdr)); 1987 skb_reset_network_header(skb2); 1988 iph = ip_hdr(skb2); 1989 iph->ihl = sizeof(struct iphdr) >> 2; 1990 iph->saddr = rt->rt_src; 1991 iph->daddr = rt->rt_dst; 1992 iph->version = 0; 1993 err = ipmr_cache_unresolved(mrt, vif, skb2); 1994 read_unlock(&mrt_lock); 1995 return err; 1996 } 1997 1998 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 1999 cache->mfc_flags |= MFC_NOTIFY; 2000 err = __ipmr_fill_mroute(mrt, skb, cache, rtm); 2001 read_unlock(&mrt_lock); 2002 return err; 2003 } 2004 2005 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2006 u32 pid, u32 seq, struct mfc_cache *c) 2007 { 2008 struct nlmsghdr *nlh; 2009 struct rtmsg *rtm; 2010 2011 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); 2012 if (nlh == NULL) 2013 return -EMSGSIZE; 2014 2015 rtm = nlmsg_data(nlh); 2016 rtm->rtm_family = RTNL_FAMILY_IPMR; 2017 rtm->rtm_dst_len = 32; 2018 rtm->rtm_src_len = 32; 2019 rtm->rtm_tos = 0; 2020 rtm->rtm_table = mrt->id; 2021 NLA_PUT_U32(skb, RTA_TABLE, mrt->id); 2022 rtm->rtm_type = RTN_MULTICAST; 2023 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2024 rtm->rtm_protocol = RTPROT_UNSPEC; 2025 rtm->rtm_flags = 0; 2026 2027 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin); 2028 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp); 2029 2030 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0) 2031 goto nla_put_failure; 2032 2033 return nlmsg_end(skb, nlh); 2034 2035 nla_put_failure: 2036 nlmsg_cancel(skb, nlh); 2037 return -EMSGSIZE; 2038 } 2039 2040 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2041 { 2042 struct net *net = sock_net(skb->sk); 2043 struct mr_table *mrt; 2044 struct mfc_cache *mfc; 2045 unsigned int t = 0, s_t; 2046 unsigned int h = 0, s_h; 2047 unsigned int e = 0, s_e; 2048 2049 s_t = cb->args[0]; 2050 s_h = cb->args[1]; 2051 s_e = cb->args[2]; 2052 2053 read_lock(&mrt_lock); 2054 ipmr_for_each_table(mrt, net) { 2055 if (t < s_t) 2056 goto next_table; 2057 if (t > s_t) 2058 s_h = 0; 2059 for (h = s_h; h < MFC_LINES; h++) { 2060 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) { 2061 if (e < s_e) 2062 goto next_entry; 2063 if (ipmr_fill_mroute(mrt, skb, 2064 NETLINK_CB(cb->skb).pid, 2065 cb->nlh->nlmsg_seq, 2066 mfc) < 0) 2067 goto done; 2068 next_entry: 2069 e++; 2070 } 2071 e = s_e = 0; 2072 } 2073 s_h = 0; 2074 next_table: 2075 t++; 2076 } 2077 done: 2078 read_unlock(&mrt_lock); 2079 2080 cb->args[2] = e; 2081 cb->args[1] = h; 2082 cb->args[0] = t; 2083 2084 return skb->len; 2085 } 2086 2087 #ifdef CONFIG_PROC_FS 2088 /* 2089 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif 2090 */ 2091 struct ipmr_vif_iter { 2092 struct seq_net_private p; 2093 struct mr_table *mrt; 2094 int ct; 2095 }; 2096 2097 static struct vif_device *ipmr_vif_seq_idx(struct net *net, 2098 struct ipmr_vif_iter *iter, 2099 loff_t pos) 2100 { 2101 struct mr_table *mrt = iter->mrt; 2102 2103 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { 2104 if (!VIF_EXISTS(mrt, iter->ct)) 2105 continue; 2106 if (pos-- == 0) 2107 return &mrt->vif_table[iter->ct]; 2108 } 2109 return NULL; 2110 } 2111 2112 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 2113 __acquires(mrt_lock) 2114 { 2115 struct ipmr_vif_iter *iter = seq->private; 2116 struct net *net = seq_file_net(seq); 2117 struct mr_table *mrt; 2118 2119 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2120 if (mrt == NULL) 2121 return ERR_PTR(-ENOENT); 2122 2123 iter->mrt = mrt; 2124 2125 read_lock(&mrt_lock); 2126 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) 2127 : SEQ_START_TOKEN; 2128 } 2129 2130 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2131 { 2132 struct ipmr_vif_iter *iter = seq->private; 2133 struct net *net = seq_file_net(seq); 2134 struct mr_table *mrt = iter->mrt; 2135 2136 ++*pos; 2137 if (v == SEQ_START_TOKEN) 2138 return ipmr_vif_seq_idx(net, iter, 0); 2139 2140 while (++iter->ct < mrt->maxvif) { 2141 if (!VIF_EXISTS(mrt, iter->ct)) 2142 continue; 2143 return &mrt->vif_table[iter->ct]; 2144 } 2145 return NULL; 2146 } 2147 2148 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 2149 __releases(mrt_lock) 2150 { 2151 read_unlock(&mrt_lock); 2152 } 2153 2154 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 2155 { 2156 struct ipmr_vif_iter *iter = seq->private; 2157 struct mr_table *mrt = iter->mrt; 2158 2159 if (v == SEQ_START_TOKEN) { 2160 seq_puts(seq, 2161 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 2162 } else { 2163 const struct vif_device *vif = v; 2164 const char *name = vif->dev ? vif->dev->name : "none"; 2165 2166 seq_printf(seq, 2167 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 2168 vif - mrt->vif_table, 2169 name, vif->bytes_in, vif->pkt_in, 2170 vif->bytes_out, vif->pkt_out, 2171 vif->flags, vif->local, vif->remote); 2172 } 2173 return 0; 2174 } 2175 2176 static const struct seq_operations ipmr_vif_seq_ops = { 2177 .start = ipmr_vif_seq_start, 2178 .next = ipmr_vif_seq_next, 2179 .stop = ipmr_vif_seq_stop, 2180 .show = ipmr_vif_seq_show, 2181 }; 2182 2183 static int ipmr_vif_open(struct inode *inode, struct file *file) 2184 { 2185 return seq_open_net(inode, file, &ipmr_vif_seq_ops, 2186 sizeof(struct ipmr_vif_iter)); 2187 } 2188 2189 static const struct file_operations ipmr_vif_fops = { 2190 .owner = THIS_MODULE, 2191 .open = ipmr_vif_open, 2192 .read = seq_read, 2193 .llseek = seq_lseek, 2194 .release = seq_release_net, 2195 }; 2196 2197 struct ipmr_mfc_iter { 2198 struct seq_net_private p; 2199 struct mr_table *mrt; 2200 struct list_head *cache; 2201 int ct; 2202 }; 2203 2204 2205 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, 2206 struct ipmr_mfc_iter *it, loff_t pos) 2207 { 2208 struct mr_table *mrt = it->mrt; 2209 struct mfc_cache *mfc; 2210 2211 read_lock(&mrt_lock); 2212 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) { 2213 it->cache = &mrt->mfc_cache_array[it->ct]; 2214 list_for_each_entry(mfc, it->cache, list) 2215 if (pos-- == 0) 2216 return mfc; 2217 } 2218 read_unlock(&mrt_lock); 2219 2220 spin_lock_bh(&mfc_unres_lock); 2221 it->cache = &mrt->mfc_unres_queue; 2222 list_for_each_entry(mfc, it->cache, list) 2223 if (pos-- == 0) 2224 return mfc; 2225 spin_unlock_bh(&mfc_unres_lock); 2226 2227 it->cache = NULL; 2228 return NULL; 2229 } 2230 2231 2232 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 2233 { 2234 struct ipmr_mfc_iter *it = seq->private; 2235 struct net *net = seq_file_net(seq); 2236 struct mr_table *mrt; 2237 2238 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2239 if (mrt == NULL) 2240 return ERR_PTR(-ENOENT); 2241 2242 it->mrt = mrt; 2243 it->cache = NULL; 2244 it->ct = 0; 2245 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) 2246 : SEQ_START_TOKEN; 2247 } 2248 2249 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2250 { 2251 struct mfc_cache *mfc = v; 2252 struct ipmr_mfc_iter *it = seq->private; 2253 struct net *net = seq_file_net(seq); 2254 struct mr_table *mrt = it->mrt; 2255 2256 ++*pos; 2257 2258 if (v == SEQ_START_TOKEN) 2259 return ipmr_mfc_seq_idx(net, seq->private, 0); 2260 2261 if (mfc->list.next != it->cache) 2262 return list_entry(mfc->list.next, struct mfc_cache, list); 2263 2264 if (it->cache == &mrt->mfc_unres_queue) 2265 goto end_of_list; 2266 2267 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]); 2268 2269 while (++it->ct < MFC_LINES) { 2270 it->cache = &mrt->mfc_cache_array[it->ct]; 2271 if (list_empty(it->cache)) 2272 continue; 2273 return list_first_entry(it->cache, struct mfc_cache, list); 2274 } 2275 2276 /* exhausted cache_array, show unresolved */ 2277 read_unlock(&mrt_lock); 2278 it->cache = &mrt->mfc_unres_queue; 2279 it->ct = 0; 2280 2281 spin_lock_bh(&mfc_unres_lock); 2282 if (!list_empty(it->cache)) 2283 return list_first_entry(it->cache, struct mfc_cache, list); 2284 2285 end_of_list: 2286 spin_unlock_bh(&mfc_unres_lock); 2287 it->cache = NULL; 2288 2289 return NULL; 2290 } 2291 2292 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 2293 { 2294 struct ipmr_mfc_iter *it = seq->private; 2295 struct mr_table *mrt = it->mrt; 2296 2297 if (it->cache == &mrt->mfc_unres_queue) 2298 spin_unlock_bh(&mfc_unres_lock); 2299 else if (it->cache == &mrt->mfc_cache_array[it->ct]) 2300 read_unlock(&mrt_lock); 2301 } 2302 2303 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 2304 { 2305 int n; 2306 2307 if (v == SEQ_START_TOKEN) { 2308 seq_puts(seq, 2309 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 2310 } else { 2311 const struct mfc_cache *mfc = v; 2312 const struct ipmr_mfc_iter *it = seq->private; 2313 const struct mr_table *mrt = it->mrt; 2314 2315 seq_printf(seq, "%08X %08X %-3hd", 2316 (__force u32) mfc->mfc_mcastgrp, 2317 (__force u32) mfc->mfc_origin, 2318 mfc->mfc_parent); 2319 2320 if (it->cache != &mrt->mfc_unres_queue) { 2321 seq_printf(seq, " %8lu %8lu %8lu", 2322 mfc->mfc_un.res.pkt, 2323 mfc->mfc_un.res.bytes, 2324 mfc->mfc_un.res.wrong_if); 2325 for (n = mfc->mfc_un.res.minvif; 2326 n < mfc->mfc_un.res.maxvif; n++ ) { 2327 if (VIF_EXISTS(mrt, n) && 2328 mfc->mfc_un.res.ttls[n] < 255) 2329 seq_printf(seq, 2330 " %2d:%-3d", 2331 n, mfc->mfc_un.res.ttls[n]); 2332 } 2333 } else { 2334 /* unresolved mfc_caches don't contain 2335 * pkt, bytes and wrong_if values 2336 */ 2337 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 2338 } 2339 seq_putc(seq, '\n'); 2340 } 2341 return 0; 2342 } 2343 2344 static const struct seq_operations ipmr_mfc_seq_ops = { 2345 .start = ipmr_mfc_seq_start, 2346 .next = ipmr_mfc_seq_next, 2347 .stop = ipmr_mfc_seq_stop, 2348 .show = ipmr_mfc_seq_show, 2349 }; 2350 2351 static int ipmr_mfc_open(struct inode *inode, struct file *file) 2352 { 2353 return seq_open_net(inode, file, &ipmr_mfc_seq_ops, 2354 sizeof(struct ipmr_mfc_iter)); 2355 } 2356 2357 static const struct file_operations ipmr_mfc_fops = { 2358 .owner = THIS_MODULE, 2359 .open = ipmr_mfc_open, 2360 .read = seq_read, 2361 .llseek = seq_lseek, 2362 .release = seq_release_net, 2363 }; 2364 #endif 2365 2366 #ifdef CONFIG_IP_PIMSM_V2 2367 static const struct net_protocol pim_protocol = { 2368 .handler = pim_rcv, 2369 .netns_ok = 1, 2370 }; 2371 #endif 2372 2373 2374 /* 2375 * Setup for IP multicast routing 2376 */ 2377 static int __net_init ipmr_net_init(struct net *net) 2378 { 2379 int err; 2380 2381 err = ipmr_rules_init(net); 2382 if (err < 0) 2383 goto fail; 2384 2385 #ifdef CONFIG_PROC_FS 2386 err = -ENOMEM; 2387 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops)) 2388 goto proc_vif_fail; 2389 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops)) 2390 goto proc_cache_fail; 2391 #endif 2392 return 0; 2393 2394 #ifdef CONFIG_PROC_FS 2395 proc_cache_fail: 2396 proc_net_remove(net, "ip_mr_vif"); 2397 proc_vif_fail: 2398 ipmr_rules_exit(net); 2399 #endif 2400 fail: 2401 return err; 2402 } 2403 2404 static void __net_exit ipmr_net_exit(struct net *net) 2405 { 2406 #ifdef CONFIG_PROC_FS 2407 proc_net_remove(net, "ip_mr_cache"); 2408 proc_net_remove(net, "ip_mr_vif"); 2409 #endif 2410 ipmr_rules_exit(net); 2411 } 2412 2413 static struct pernet_operations ipmr_net_ops = { 2414 .init = ipmr_net_init, 2415 .exit = ipmr_net_exit, 2416 }; 2417 2418 int __init ip_mr_init(void) 2419 { 2420 int err; 2421 2422 mrt_cachep = kmem_cache_create("ip_mrt_cache", 2423 sizeof(struct mfc_cache), 2424 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 2425 NULL); 2426 if (!mrt_cachep) 2427 return -ENOMEM; 2428 2429 err = register_pernet_subsys(&ipmr_net_ops); 2430 if (err) 2431 goto reg_pernet_fail; 2432 2433 err = register_netdevice_notifier(&ip_mr_notifier); 2434 if (err) 2435 goto reg_notif_fail; 2436 #ifdef CONFIG_IP_PIMSM_V2 2437 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { 2438 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n"); 2439 err = -EAGAIN; 2440 goto add_proto_fail; 2441 } 2442 #endif 2443 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute); 2444 return 0; 2445 2446 #ifdef CONFIG_IP_PIMSM_V2 2447 add_proto_fail: 2448 unregister_netdevice_notifier(&ip_mr_notifier); 2449 #endif 2450 reg_notif_fail: 2451 unregister_pernet_subsys(&ipmr_net_ops); 2452 reg_pernet_fail: 2453 kmem_cache_destroy(mrt_cachep); 2454 return err; 2455 } 2456