1 /* 2 * IP multicast routing support for mrouted 3.6/3.8 3 * 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> 5 * Linux Consultancy and Custom Driver Development 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Fixes: 13 * Michael Chastain : Incorrect size of copying. 14 * Alan Cox : Added the cache manager code 15 * Alan Cox : Fixed the clone/copy bug and device race. 16 * Mike McLagan : Routing by source 17 * Malcolm Beattie : Buffer handling fixes. 18 * Alexey Kuznetsov : Double buffer free and other fixes. 19 * SVR Anand : Fixed several multicast bugs and problems. 20 * Alexey Kuznetsov : Status, optimisations and more. 21 * Brad Parker : Better behaviour on mrouted upcall 22 * overflow. 23 * Carlos Picoto : PIMv1 Support 24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 25 * Relax this requrement to work with older peers. 26 * 27 */ 28 29 #include <asm/system.h> 30 #include <asm/uaccess.h> 31 #include <linux/types.h> 32 #include <linux/capability.h> 33 #include <linux/errno.h> 34 #include <linux/timer.h> 35 #include <linux/mm.h> 36 #include <linux/kernel.h> 37 #include <linux/fcntl.h> 38 #include <linux/stat.h> 39 #include <linux/socket.h> 40 #include <linux/in.h> 41 #include <linux/inet.h> 42 #include <linux/netdevice.h> 43 #include <linux/inetdevice.h> 44 #include <linux/igmp.h> 45 #include <linux/proc_fs.h> 46 #include <linux/seq_file.h> 47 #include <linux/mroute.h> 48 #include <linux/init.h> 49 #include <linux/if_ether.h> 50 #include <net/net_namespace.h> 51 #include <net/ip.h> 52 #include <net/protocol.h> 53 #include <linux/skbuff.h> 54 #include <net/route.h> 55 #include <net/sock.h> 56 #include <net/icmp.h> 57 #include <net/udp.h> 58 #include <net/raw.h> 59 #include <linux/notifier.h> 60 #include <linux/if_arp.h> 61 #include <linux/netfilter_ipv4.h> 62 #include <net/ipip.h> 63 #include <net/checksum.h> 64 #include <net/netlink.h> 65 66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 67 #define CONFIG_IP_PIMSM 1 68 #endif 69 70 /* Big lock, protecting vif table, mrt cache and mroute socket state. 71 Note that the changes are semaphored via rtnl_lock. 72 */ 73 74 static DEFINE_RWLOCK(mrt_lock); 75 76 /* 77 * Multicast router control variables 78 */ 79 80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL) 81 82 static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */ 83 84 /* Special spinlock for queue of unresolved entries */ 85 static DEFINE_SPINLOCK(mfc_unres_lock); 86 87 /* We return to original Alan's scheme. Hash table of resolved 88 entries is changed only in process context and protected 89 with weak lock mrt_lock. Queue of unresolved entries is protected 90 with strong spinlock mfc_unres_lock. 91 92 In this case data path is free of exclusive locks at all. 93 */ 94 95 static struct kmem_cache *mrt_cachep __read_mostly; 96 97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); 98 static int ipmr_cache_report(struct net *net, 99 struct sk_buff *pkt, vifi_t vifi, int assert); 100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); 101 102 #ifdef CONFIG_IP_PIMSM_V2 103 static struct net_protocol pim_protocol; 104 #endif 105 106 static struct timer_list ipmr_expire_timer; 107 108 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 109 110 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) 111 { 112 struct net *net = dev_net(dev); 113 114 dev_close(dev); 115 116 dev = __dev_get_by_name(net, "tunl0"); 117 if (dev) { 118 const struct net_device_ops *ops = dev->netdev_ops; 119 struct ifreq ifr; 120 struct ip_tunnel_parm p; 121 122 memset(&p, 0, sizeof(p)); 123 p.iph.daddr = v->vifc_rmt_addr.s_addr; 124 p.iph.saddr = v->vifc_lcl_addr.s_addr; 125 p.iph.version = 4; 126 p.iph.ihl = 5; 127 p.iph.protocol = IPPROTO_IPIP; 128 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 129 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 130 131 if (ops->ndo_do_ioctl) { 132 mm_segment_t oldfs = get_fs(); 133 134 set_fs(KERNEL_DS); 135 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); 136 set_fs(oldfs); 137 } 138 } 139 } 140 141 static 142 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) 143 { 144 struct net_device *dev; 145 146 dev = __dev_get_by_name(net, "tunl0"); 147 148 if (dev) { 149 const struct net_device_ops *ops = dev->netdev_ops; 150 int err; 151 struct ifreq ifr; 152 struct ip_tunnel_parm p; 153 struct in_device *in_dev; 154 155 memset(&p, 0, sizeof(p)); 156 p.iph.daddr = v->vifc_rmt_addr.s_addr; 157 p.iph.saddr = v->vifc_lcl_addr.s_addr; 158 p.iph.version = 4; 159 p.iph.ihl = 5; 160 p.iph.protocol = IPPROTO_IPIP; 161 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 162 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 163 164 if (ops->ndo_do_ioctl) { 165 mm_segment_t oldfs = get_fs(); 166 167 set_fs(KERNEL_DS); 168 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); 169 set_fs(oldfs); 170 } else 171 err = -EOPNOTSUPP; 172 173 dev = NULL; 174 175 if (err == 0 && 176 (dev = __dev_get_by_name(net, p.name)) != NULL) { 177 dev->flags |= IFF_MULTICAST; 178 179 in_dev = __in_dev_get_rtnl(dev); 180 if (in_dev == NULL) 181 goto failure; 182 183 ipv4_devconf_setall(in_dev); 184 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 185 186 if (dev_open(dev)) 187 goto failure; 188 dev_hold(dev); 189 } 190 } 191 return dev; 192 193 failure: 194 /* allow the register to be completed before unregistering. */ 195 rtnl_unlock(); 196 rtnl_lock(); 197 198 unregister_netdevice(dev); 199 return NULL; 200 } 201 202 #ifdef CONFIG_IP_PIMSM 203 204 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 205 { 206 struct net *net = dev_net(dev); 207 208 read_lock(&mrt_lock); 209 dev->stats.tx_bytes += skb->len; 210 dev->stats.tx_packets++; 211 ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num, 212 IGMPMSG_WHOLEPKT); 213 read_unlock(&mrt_lock); 214 kfree_skb(skb); 215 return 0; 216 } 217 218 static const struct net_device_ops reg_vif_netdev_ops = { 219 .ndo_start_xmit = reg_vif_xmit, 220 }; 221 222 static void reg_vif_setup(struct net_device *dev) 223 { 224 dev->type = ARPHRD_PIMREG; 225 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 226 dev->flags = IFF_NOARP; 227 dev->netdev_ops = ®_vif_netdev_ops, 228 dev->destructor = free_netdev; 229 } 230 231 static struct net_device *ipmr_reg_vif(void) 232 { 233 struct net_device *dev; 234 struct in_device *in_dev; 235 236 dev = alloc_netdev(0, "pimreg", reg_vif_setup); 237 238 if (dev == NULL) 239 return NULL; 240 241 if (register_netdevice(dev)) { 242 free_netdev(dev); 243 return NULL; 244 } 245 dev->iflink = 0; 246 247 rcu_read_lock(); 248 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { 249 rcu_read_unlock(); 250 goto failure; 251 } 252 253 ipv4_devconf_setall(in_dev); 254 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 255 rcu_read_unlock(); 256 257 if (dev_open(dev)) 258 goto failure; 259 260 dev_hold(dev); 261 262 return dev; 263 264 failure: 265 /* allow the register to be completed before unregistering. */ 266 rtnl_unlock(); 267 rtnl_lock(); 268 269 unregister_netdevice(dev); 270 return NULL; 271 } 272 #endif 273 274 /* 275 * Delete a VIF entry 276 * @notify: Set to 1, if the caller is a notifier_call 277 */ 278 279 static int vif_delete(struct net *net, int vifi, int notify) 280 { 281 struct vif_device *v; 282 struct net_device *dev; 283 struct in_device *in_dev; 284 285 if (vifi < 0 || vifi >= net->ipv4.maxvif) 286 return -EADDRNOTAVAIL; 287 288 v = &net->ipv4.vif_table[vifi]; 289 290 write_lock_bh(&mrt_lock); 291 dev = v->dev; 292 v->dev = NULL; 293 294 if (!dev) { 295 write_unlock_bh(&mrt_lock); 296 return -EADDRNOTAVAIL; 297 } 298 299 #ifdef CONFIG_IP_PIMSM 300 if (vifi == net->ipv4.mroute_reg_vif_num) 301 net->ipv4.mroute_reg_vif_num = -1; 302 #endif 303 304 if (vifi+1 == net->ipv4.maxvif) { 305 int tmp; 306 for (tmp=vifi-1; tmp>=0; tmp--) { 307 if (VIF_EXISTS(net, tmp)) 308 break; 309 } 310 net->ipv4.maxvif = tmp+1; 311 } 312 313 write_unlock_bh(&mrt_lock); 314 315 dev_set_allmulti(dev, -1); 316 317 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { 318 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 319 ip_rt_multicast_event(in_dev); 320 } 321 322 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) 323 unregister_netdevice(dev); 324 325 dev_put(dev); 326 return 0; 327 } 328 329 static inline void ipmr_cache_free(struct mfc_cache *c) 330 { 331 release_net(mfc_net(c)); 332 kmem_cache_free(mrt_cachep, c); 333 } 334 335 /* Destroy an unresolved cache entry, killing queued skbs 336 and reporting error to netlink readers. 337 */ 338 339 static void ipmr_destroy_unres(struct mfc_cache *c) 340 { 341 struct sk_buff *skb; 342 struct nlmsgerr *e; 343 struct net *net = mfc_net(c); 344 345 atomic_dec(&net->ipv4.cache_resolve_queue_len); 346 347 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { 348 if (ip_hdr(skb)->version == 0) { 349 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 350 nlh->nlmsg_type = NLMSG_ERROR; 351 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 352 skb_trim(skb, nlh->nlmsg_len); 353 e = NLMSG_DATA(nlh); 354 e->error = -ETIMEDOUT; 355 memset(&e->msg, 0, sizeof(e->msg)); 356 357 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 358 } else 359 kfree_skb(skb); 360 } 361 362 ipmr_cache_free(c); 363 } 364 365 366 /* Single timer process for all the unresolved queue. */ 367 368 static void ipmr_expire_process(unsigned long dummy) 369 { 370 unsigned long now; 371 unsigned long expires; 372 struct mfc_cache *c, **cp; 373 374 if (!spin_trylock(&mfc_unres_lock)) { 375 mod_timer(&ipmr_expire_timer, jiffies+HZ/10); 376 return; 377 } 378 379 if (mfc_unres_queue == NULL) 380 goto out; 381 382 now = jiffies; 383 expires = 10*HZ; 384 cp = &mfc_unres_queue; 385 386 while ((c=*cp) != NULL) { 387 if (time_after(c->mfc_un.unres.expires, now)) { 388 unsigned long interval = c->mfc_un.unres.expires - now; 389 if (interval < expires) 390 expires = interval; 391 cp = &c->next; 392 continue; 393 } 394 395 *cp = c->next; 396 397 ipmr_destroy_unres(c); 398 } 399 400 if (mfc_unres_queue != NULL) 401 mod_timer(&ipmr_expire_timer, jiffies + expires); 402 403 out: 404 spin_unlock(&mfc_unres_lock); 405 } 406 407 /* Fill oifs list. It is called under write locked mrt_lock. */ 408 409 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) 410 { 411 int vifi; 412 struct net *net = mfc_net(cache); 413 414 cache->mfc_un.res.minvif = MAXVIFS; 415 cache->mfc_un.res.maxvif = 0; 416 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 417 418 for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) { 419 if (VIF_EXISTS(net, vifi) && 420 ttls[vifi] && ttls[vifi] < 255) { 421 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 422 if (cache->mfc_un.res.minvif > vifi) 423 cache->mfc_un.res.minvif = vifi; 424 if (cache->mfc_un.res.maxvif <= vifi) 425 cache->mfc_un.res.maxvif = vifi + 1; 426 } 427 } 428 } 429 430 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock) 431 { 432 int vifi = vifc->vifc_vifi; 433 struct vif_device *v = &net->ipv4.vif_table[vifi]; 434 struct net_device *dev; 435 struct in_device *in_dev; 436 int err; 437 438 /* Is vif busy ? */ 439 if (VIF_EXISTS(net, vifi)) 440 return -EADDRINUSE; 441 442 switch (vifc->vifc_flags) { 443 #ifdef CONFIG_IP_PIMSM 444 case VIFF_REGISTER: 445 /* 446 * Special Purpose VIF in PIM 447 * All the packets will be sent to the daemon 448 */ 449 if (net->ipv4.mroute_reg_vif_num >= 0) 450 return -EADDRINUSE; 451 dev = ipmr_reg_vif(); 452 if (!dev) 453 return -ENOBUFS; 454 err = dev_set_allmulti(dev, 1); 455 if (err) { 456 unregister_netdevice(dev); 457 dev_put(dev); 458 return err; 459 } 460 break; 461 #endif 462 case VIFF_TUNNEL: 463 dev = ipmr_new_tunnel(net, vifc); 464 if (!dev) 465 return -ENOBUFS; 466 err = dev_set_allmulti(dev, 1); 467 if (err) { 468 ipmr_del_tunnel(dev, vifc); 469 dev_put(dev); 470 return err; 471 } 472 break; 473 case 0: 474 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 475 if (!dev) 476 return -EADDRNOTAVAIL; 477 err = dev_set_allmulti(dev, 1); 478 if (err) { 479 dev_put(dev); 480 return err; 481 } 482 break; 483 default: 484 return -EINVAL; 485 } 486 487 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) 488 return -EADDRNOTAVAIL; 489 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 490 ip_rt_multicast_event(in_dev); 491 492 /* 493 * Fill in the VIF structures 494 */ 495 v->rate_limit = vifc->vifc_rate_limit; 496 v->local = vifc->vifc_lcl_addr.s_addr; 497 v->remote = vifc->vifc_rmt_addr.s_addr; 498 v->flags = vifc->vifc_flags; 499 if (!mrtsock) 500 v->flags |= VIFF_STATIC; 501 v->threshold = vifc->vifc_threshold; 502 v->bytes_in = 0; 503 v->bytes_out = 0; 504 v->pkt_in = 0; 505 v->pkt_out = 0; 506 v->link = dev->ifindex; 507 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) 508 v->link = dev->iflink; 509 510 /* And finish update writing critical data */ 511 write_lock_bh(&mrt_lock); 512 v->dev = dev; 513 #ifdef CONFIG_IP_PIMSM 514 if (v->flags&VIFF_REGISTER) 515 net->ipv4.mroute_reg_vif_num = vifi; 516 #endif 517 if (vifi+1 > net->ipv4.maxvif) 518 net->ipv4.maxvif = vifi+1; 519 write_unlock_bh(&mrt_lock); 520 return 0; 521 } 522 523 static struct mfc_cache *ipmr_cache_find(struct net *net, 524 __be32 origin, 525 __be32 mcastgrp) 526 { 527 int line = MFC_HASH(mcastgrp, origin); 528 struct mfc_cache *c; 529 530 for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) { 531 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) 532 break; 533 } 534 return c; 535 } 536 537 /* 538 * Allocate a multicast cache entry 539 */ 540 static struct mfc_cache *ipmr_cache_alloc(struct net *net) 541 { 542 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 543 if (c == NULL) 544 return NULL; 545 c->mfc_un.res.minvif = MAXVIFS; 546 mfc_net_set(c, net); 547 return c; 548 } 549 550 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net) 551 { 552 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 553 if (c == NULL) 554 return NULL; 555 skb_queue_head_init(&c->mfc_un.unres.unresolved); 556 c->mfc_un.unres.expires = jiffies + 10*HZ; 557 mfc_net_set(c, net); 558 return c; 559 } 560 561 /* 562 * A cache entry has gone into a resolved state from queued 563 */ 564 565 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) 566 { 567 struct sk_buff *skb; 568 struct nlmsgerr *e; 569 570 /* 571 * Play the pending entries through our router 572 */ 573 574 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 575 if (ip_hdr(skb)->version == 0) { 576 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 577 578 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { 579 nlh->nlmsg_len = (skb_tail_pointer(skb) - 580 (u8 *)nlh); 581 } else { 582 nlh->nlmsg_type = NLMSG_ERROR; 583 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 584 skb_trim(skb, nlh->nlmsg_len); 585 e = NLMSG_DATA(nlh); 586 e->error = -EMSGSIZE; 587 memset(&e->msg, 0, sizeof(e->msg)); 588 } 589 590 rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid); 591 } else 592 ip_mr_forward(skb, c, 0); 593 } 594 } 595 596 /* 597 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted 598 * expects the following bizarre scheme. 599 * 600 * Called under mrt_lock. 601 */ 602 603 static int ipmr_cache_report(struct net *net, 604 struct sk_buff *pkt, vifi_t vifi, int assert) 605 { 606 struct sk_buff *skb; 607 const int ihl = ip_hdrlen(pkt); 608 struct igmphdr *igmp; 609 struct igmpmsg *msg; 610 int ret; 611 612 #ifdef CONFIG_IP_PIMSM 613 if (assert == IGMPMSG_WHOLEPKT) 614 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 615 else 616 #endif 617 skb = alloc_skb(128, GFP_ATOMIC); 618 619 if (!skb) 620 return -ENOBUFS; 621 622 #ifdef CONFIG_IP_PIMSM 623 if (assert == IGMPMSG_WHOLEPKT) { 624 /* Ugly, but we have no choice with this interface. 625 Duplicate old header, fix ihl, length etc. 626 And all this only to mangle msg->im_msgtype and 627 to set msg->im_mbz to "mbz" :-) 628 */ 629 skb_push(skb, sizeof(struct iphdr)); 630 skb_reset_network_header(skb); 631 skb_reset_transport_header(skb); 632 msg = (struct igmpmsg *)skb_network_header(skb); 633 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 634 msg->im_msgtype = IGMPMSG_WHOLEPKT; 635 msg->im_mbz = 0; 636 msg->im_vif = net->ipv4.mroute_reg_vif_num; 637 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 638 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 639 sizeof(struct iphdr)); 640 } else 641 #endif 642 { 643 644 /* 645 * Copy the IP header 646 */ 647 648 skb->network_header = skb->tail; 649 skb_put(skb, ihl); 650 skb_copy_to_linear_data(skb, pkt->data, ihl); 651 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ 652 msg = (struct igmpmsg *)skb_network_header(skb); 653 msg->im_vif = vifi; 654 skb->dst = dst_clone(pkt->dst); 655 656 /* 657 * Add our header 658 */ 659 660 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); 661 igmp->type = 662 msg->im_msgtype = assert; 663 igmp->code = 0; 664 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 665 skb->transport_header = skb->network_header; 666 } 667 668 if (net->ipv4.mroute_sk == NULL) { 669 kfree_skb(skb); 670 return -EINVAL; 671 } 672 673 /* 674 * Deliver to mrouted 675 */ 676 ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb); 677 if (ret < 0) { 678 if (net_ratelimit()) 679 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); 680 kfree_skb(skb); 681 } 682 683 return ret; 684 } 685 686 /* 687 * Queue a packet for resolution. It gets locked cache entry! 688 */ 689 690 static int 691 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb) 692 { 693 int err; 694 struct mfc_cache *c; 695 const struct iphdr *iph = ip_hdr(skb); 696 697 spin_lock_bh(&mfc_unres_lock); 698 for (c=mfc_unres_queue; c; c=c->next) { 699 if (net_eq(mfc_net(c), net) && 700 c->mfc_mcastgrp == iph->daddr && 701 c->mfc_origin == iph->saddr) 702 break; 703 } 704 705 if (c == NULL) { 706 /* 707 * Create a new entry if allowable 708 */ 709 710 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 || 711 (c = ipmr_cache_alloc_unres(net)) == NULL) { 712 spin_unlock_bh(&mfc_unres_lock); 713 714 kfree_skb(skb); 715 return -ENOBUFS; 716 } 717 718 /* 719 * Fill in the new cache entry 720 */ 721 c->mfc_parent = -1; 722 c->mfc_origin = iph->saddr; 723 c->mfc_mcastgrp = iph->daddr; 724 725 /* 726 * Reflect first query at mrouted. 727 */ 728 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE); 729 if (err < 0) { 730 /* If the report failed throw the cache entry 731 out - Brad Parker 732 */ 733 spin_unlock_bh(&mfc_unres_lock); 734 735 ipmr_cache_free(c); 736 kfree_skb(skb); 737 return err; 738 } 739 740 atomic_inc(&net->ipv4.cache_resolve_queue_len); 741 c->next = mfc_unres_queue; 742 mfc_unres_queue = c; 743 744 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires); 745 } 746 747 /* 748 * See if we can append the packet 749 */ 750 if (c->mfc_un.unres.unresolved.qlen>3) { 751 kfree_skb(skb); 752 err = -ENOBUFS; 753 } else { 754 skb_queue_tail(&c->mfc_un.unres.unresolved, skb); 755 err = 0; 756 } 757 758 spin_unlock_bh(&mfc_unres_lock); 759 return err; 760 } 761 762 /* 763 * MFC cache manipulation by user space mroute daemon 764 */ 765 766 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc) 767 { 768 int line; 769 struct mfc_cache *c, **cp; 770 771 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 772 773 for (cp = &net->ipv4.mfc_cache_array[line]; 774 (c = *cp) != NULL; cp = &c->next) { 775 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 776 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 777 write_lock_bh(&mrt_lock); 778 *cp = c->next; 779 write_unlock_bh(&mrt_lock); 780 781 ipmr_cache_free(c); 782 return 0; 783 } 784 } 785 return -ENOENT; 786 } 787 788 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) 789 { 790 int line; 791 struct mfc_cache *uc, *c, **cp; 792 793 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 794 795 for (cp = &net->ipv4.mfc_cache_array[line]; 796 (c = *cp) != NULL; cp = &c->next) { 797 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 798 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) 799 break; 800 } 801 802 if (c != NULL) { 803 write_lock_bh(&mrt_lock); 804 c->mfc_parent = mfc->mfcc_parent; 805 ipmr_update_thresholds(c, mfc->mfcc_ttls); 806 if (!mrtsock) 807 c->mfc_flags |= MFC_STATIC; 808 write_unlock_bh(&mrt_lock); 809 return 0; 810 } 811 812 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 813 return -EINVAL; 814 815 c = ipmr_cache_alloc(net); 816 if (c == NULL) 817 return -ENOMEM; 818 819 c->mfc_origin = mfc->mfcc_origin.s_addr; 820 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 821 c->mfc_parent = mfc->mfcc_parent; 822 ipmr_update_thresholds(c, mfc->mfcc_ttls); 823 if (!mrtsock) 824 c->mfc_flags |= MFC_STATIC; 825 826 write_lock_bh(&mrt_lock); 827 c->next = net->ipv4.mfc_cache_array[line]; 828 net->ipv4.mfc_cache_array[line] = c; 829 write_unlock_bh(&mrt_lock); 830 831 /* 832 * Check to see if we resolved a queued list. If so we 833 * need to send on the frames and tidy up. 834 */ 835 spin_lock_bh(&mfc_unres_lock); 836 for (cp = &mfc_unres_queue; (uc=*cp) != NULL; 837 cp = &uc->next) { 838 if (net_eq(mfc_net(uc), net) && 839 uc->mfc_origin == c->mfc_origin && 840 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 841 *cp = uc->next; 842 atomic_dec(&net->ipv4.cache_resolve_queue_len); 843 break; 844 } 845 } 846 if (mfc_unres_queue == NULL) 847 del_timer(&ipmr_expire_timer); 848 spin_unlock_bh(&mfc_unres_lock); 849 850 if (uc) { 851 ipmr_cache_resolve(uc, c); 852 ipmr_cache_free(uc); 853 } 854 return 0; 855 } 856 857 /* 858 * Close the multicast socket, and clear the vif tables etc 859 */ 860 861 static void mroute_clean_tables(struct net *net) 862 { 863 int i; 864 865 /* 866 * Shut down all active vif entries 867 */ 868 for (i = 0; i < net->ipv4.maxvif; i++) { 869 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC)) 870 vif_delete(net, i, 0); 871 } 872 873 /* 874 * Wipe the cache 875 */ 876 for (i=0; i<MFC_LINES; i++) { 877 struct mfc_cache *c, **cp; 878 879 cp = &net->ipv4.mfc_cache_array[i]; 880 while ((c = *cp) != NULL) { 881 if (c->mfc_flags&MFC_STATIC) { 882 cp = &c->next; 883 continue; 884 } 885 write_lock_bh(&mrt_lock); 886 *cp = c->next; 887 write_unlock_bh(&mrt_lock); 888 889 ipmr_cache_free(c); 890 } 891 } 892 893 if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) { 894 struct mfc_cache *c, **cp; 895 896 spin_lock_bh(&mfc_unres_lock); 897 cp = &mfc_unres_queue; 898 while ((c = *cp) != NULL) { 899 if (!net_eq(mfc_net(c), net)) { 900 cp = &c->next; 901 continue; 902 } 903 *cp = c->next; 904 905 ipmr_destroy_unres(c); 906 } 907 spin_unlock_bh(&mfc_unres_lock); 908 } 909 } 910 911 static void mrtsock_destruct(struct sock *sk) 912 { 913 struct net *net = sock_net(sk); 914 915 rtnl_lock(); 916 if (sk == net->ipv4.mroute_sk) { 917 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 918 919 write_lock_bh(&mrt_lock); 920 net->ipv4.mroute_sk = NULL; 921 write_unlock_bh(&mrt_lock); 922 923 mroute_clean_tables(net); 924 } 925 rtnl_unlock(); 926 } 927 928 /* 929 * Socket options and virtual interface manipulation. The whole 930 * virtual interface system is a complete heap, but unfortunately 931 * that's how BSD mrouted happens to think. Maybe one day with a proper 932 * MOSPF/PIM router set up we can clean this up. 933 */ 934 935 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen) 936 { 937 int ret; 938 struct vifctl vif; 939 struct mfcctl mfc; 940 struct net *net = sock_net(sk); 941 942 if (optname != MRT_INIT) { 943 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN)) 944 return -EACCES; 945 } 946 947 switch (optname) { 948 case MRT_INIT: 949 if (sk->sk_type != SOCK_RAW || 950 inet_sk(sk)->num != IPPROTO_IGMP) 951 return -EOPNOTSUPP; 952 if (optlen != sizeof(int)) 953 return -ENOPROTOOPT; 954 955 rtnl_lock(); 956 if (net->ipv4.mroute_sk) { 957 rtnl_unlock(); 958 return -EADDRINUSE; 959 } 960 961 ret = ip_ra_control(sk, 1, mrtsock_destruct); 962 if (ret == 0) { 963 write_lock_bh(&mrt_lock); 964 net->ipv4.mroute_sk = sk; 965 write_unlock_bh(&mrt_lock); 966 967 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 968 } 969 rtnl_unlock(); 970 return ret; 971 case MRT_DONE: 972 if (sk != net->ipv4.mroute_sk) 973 return -EACCES; 974 return ip_ra_control(sk, 0, NULL); 975 case MRT_ADD_VIF: 976 case MRT_DEL_VIF: 977 if (optlen != sizeof(vif)) 978 return -EINVAL; 979 if (copy_from_user(&vif, optval, sizeof(vif))) 980 return -EFAULT; 981 if (vif.vifc_vifi >= MAXVIFS) 982 return -ENFILE; 983 rtnl_lock(); 984 if (optname == MRT_ADD_VIF) { 985 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk); 986 } else { 987 ret = vif_delete(net, vif.vifc_vifi, 0); 988 } 989 rtnl_unlock(); 990 return ret; 991 992 /* 993 * Manipulate the forwarding caches. These live 994 * in a sort of kernel/user symbiosis. 995 */ 996 case MRT_ADD_MFC: 997 case MRT_DEL_MFC: 998 if (optlen != sizeof(mfc)) 999 return -EINVAL; 1000 if (copy_from_user(&mfc, optval, sizeof(mfc))) 1001 return -EFAULT; 1002 rtnl_lock(); 1003 if (optname == MRT_DEL_MFC) 1004 ret = ipmr_mfc_delete(net, &mfc); 1005 else 1006 ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk); 1007 rtnl_unlock(); 1008 return ret; 1009 /* 1010 * Control PIM assert. 1011 */ 1012 case MRT_ASSERT: 1013 { 1014 int v; 1015 if (get_user(v,(int __user *)optval)) 1016 return -EFAULT; 1017 net->ipv4.mroute_do_assert = (v) ? 1 : 0; 1018 return 0; 1019 } 1020 #ifdef CONFIG_IP_PIMSM 1021 case MRT_PIM: 1022 { 1023 int v; 1024 1025 if (get_user(v,(int __user *)optval)) 1026 return -EFAULT; 1027 v = (v) ? 1 : 0; 1028 1029 rtnl_lock(); 1030 ret = 0; 1031 if (v != net->ipv4.mroute_do_pim) { 1032 net->ipv4.mroute_do_pim = v; 1033 net->ipv4.mroute_do_assert = v; 1034 #ifdef CONFIG_IP_PIMSM_V2 1035 if (net->ipv4.mroute_do_pim) 1036 ret = inet_add_protocol(&pim_protocol, 1037 IPPROTO_PIM); 1038 else 1039 ret = inet_del_protocol(&pim_protocol, 1040 IPPROTO_PIM); 1041 if (ret < 0) 1042 ret = -EAGAIN; 1043 #endif 1044 } 1045 rtnl_unlock(); 1046 return ret; 1047 } 1048 #endif 1049 /* 1050 * Spurious command, or MRT_VERSION which you cannot 1051 * set. 1052 */ 1053 default: 1054 return -ENOPROTOOPT; 1055 } 1056 } 1057 1058 /* 1059 * Getsock opt support for the multicast routing system. 1060 */ 1061 1062 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) 1063 { 1064 int olr; 1065 int val; 1066 struct net *net = sock_net(sk); 1067 1068 if (optname != MRT_VERSION && 1069 #ifdef CONFIG_IP_PIMSM 1070 optname!=MRT_PIM && 1071 #endif 1072 optname!=MRT_ASSERT) 1073 return -ENOPROTOOPT; 1074 1075 if (get_user(olr, optlen)) 1076 return -EFAULT; 1077 1078 olr = min_t(unsigned int, olr, sizeof(int)); 1079 if (olr < 0) 1080 return -EINVAL; 1081 1082 if (put_user(olr, optlen)) 1083 return -EFAULT; 1084 if (optname == MRT_VERSION) 1085 val = 0x0305; 1086 #ifdef CONFIG_IP_PIMSM 1087 else if (optname == MRT_PIM) 1088 val = net->ipv4.mroute_do_pim; 1089 #endif 1090 else 1091 val = net->ipv4.mroute_do_assert; 1092 if (copy_to_user(optval, &val, olr)) 1093 return -EFAULT; 1094 return 0; 1095 } 1096 1097 /* 1098 * The IP multicast ioctl support routines. 1099 */ 1100 1101 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) 1102 { 1103 struct sioc_sg_req sr; 1104 struct sioc_vif_req vr; 1105 struct vif_device *vif; 1106 struct mfc_cache *c; 1107 struct net *net = sock_net(sk); 1108 1109 switch (cmd) { 1110 case SIOCGETVIFCNT: 1111 if (copy_from_user(&vr, arg, sizeof(vr))) 1112 return -EFAULT; 1113 if (vr.vifi >= net->ipv4.maxvif) 1114 return -EINVAL; 1115 read_lock(&mrt_lock); 1116 vif = &net->ipv4.vif_table[vr.vifi]; 1117 if (VIF_EXISTS(net, vr.vifi)) { 1118 vr.icount = vif->pkt_in; 1119 vr.ocount = vif->pkt_out; 1120 vr.ibytes = vif->bytes_in; 1121 vr.obytes = vif->bytes_out; 1122 read_unlock(&mrt_lock); 1123 1124 if (copy_to_user(arg, &vr, sizeof(vr))) 1125 return -EFAULT; 1126 return 0; 1127 } 1128 read_unlock(&mrt_lock); 1129 return -EADDRNOTAVAIL; 1130 case SIOCGETSGCNT: 1131 if (copy_from_user(&sr, arg, sizeof(sr))) 1132 return -EFAULT; 1133 1134 read_lock(&mrt_lock); 1135 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr); 1136 if (c) { 1137 sr.pktcnt = c->mfc_un.res.pkt; 1138 sr.bytecnt = c->mfc_un.res.bytes; 1139 sr.wrong_if = c->mfc_un.res.wrong_if; 1140 read_unlock(&mrt_lock); 1141 1142 if (copy_to_user(arg, &sr, sizeof(sr))) 1143 return -EFAULT; 1144 return 0; 1145 } 1146 read_unlock(&mrt_lock); 1147 return -EADDRNOTAVAIL; 1148 default: 1149 return -ENOIOCTLCMD; 1150 } 1151 } 1152 1153 1154 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1155 { 1156 struct net_device *dev = ptr; 1157 struct net *net = dev_net(dev); 1158 struct vif_device *v; 1159 int ct; 1160 1161 if (!net_eq(dev_net(dev), net)) 1162 return NOTIFY_DONE; 1163 1164 if (event != NETDEV_UNREGISTER) 1165 return NOTIFY_DONE; 1166 v = &net->ipv4.vif_table[0]; 1167 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) { 1168 if (v->dev == dev) 1169 vif_delete(net, ct, 1); 1170 } 1171 return NOTIFY_DONE; 1172 } 1173 1174 1175 static struct notifier_block ip_mr_notifier = { 1176 .notifier_call = ipmr_device_event, 1177 }; 1178 1179 /* 1180 * Encapsulate a packet by attaching a valid IPIP header to it. 1181 * This avoids tunnel drivers and other mess and gives us the speed so 1182 * important for multicast video. 1183 */ 1184 1185 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) 1186 { 1187 struct iphdr *iph; 1188 struct iphdr *old_iph = ip_hdr(skb); 1189 1190 skb_push(skb, sizeof(struct iphdr)); 1191 skb->transport_header = skb->network_header; 1192 skb_reset_network_header(skb); 1193 iph = ip_hdr(skb); 1194 1195 iph->version = 4; 1196 iph->tos = old_iph->tos; 1197 iph->ttl = old_iph->ttl; 1198 iph->frag_off = 0; 1199 iph->daddr = daddr; 1200 iph->saddr = saddr; 1201 iph->protocol = IPPROTO_IPIP; 1202 iph->ihl = 5; 1203 iph->tot_len = htons(skb->len); 1204 ip_select_ident(iph, skb->dst, NULL); 1205 ip_send_check(iph); 1206 1207 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1208 nf_reset(skb); 1209 } 1210 1211 static inline int ipmr_forward_finish(struct sk_buff *skb) 1212 { 1213 struct ip_options * opt = &(IPCB(skb)->opt); 1214 1215 IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); 1216 1217 if (unlikely(opt->optlen)) 1218 ip_forward_options(skb); 1219 1220 return dst_output(skb); 1221 } 1222 1223 /* 1224 * Processing handlers for ipmr_forward 1225 */ 1226 1227 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) 1228 { 1229 struct net *net = mfc_net(c); 1230 const struct iphdr *iph = ip_hdr(skb); 1231 struct vif_device *vif = &net->ipv4.vif_table[vifi]; 1232 struct net_device *dev; 1233 struct rtable *rt; 1234 int encap = 0; 1235 1236 if (vif->dev == NULL) 1237 goto out_free; 1238 1239 #ifdef CONFIG_IP_PIMSM 1240 if (vif->flags & VIFF_REGISTER) { 1241 vif->pkt_out++; 1242 vif->bytes_out += skb->len; 1243 vif->dev->stats.tx_bytes += skb->len; 1244 vif->dev->stats.tx_packets++; 1245 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT); 1246 goto out_free; 1247 } 1248 #endif 1249 1250 if (vif->flags&VIFF_TUNNEL) { 1251 struct flowi fl = { .oif = vif->link, 1252 .nl_u = { .ip4_u = 1253 { .daddr = vif->remote, 1254 .saddr = vif->local, 1255 .tos = RT_TOS(iph->tos) } }, 1256 .proto = IPPROTO_IPIP }; 1257 if (ip_route_output_key(net, &rt, &fl)) 1258 goto out_free; 1259 encap = sizeof(struct iphdr); 1260 } else { 1261 struct flowi fl = { .oif = vif->link, 1262 .nl_u = { .ip4_u = 1263 { .daddr = iph->daddr, 1264 .tos = RT_TOS(iph->tos) } }, 1265 .proto = IPPROTO_IPIP }; 1266 if (ip_route_output_key(net, &rt, &fl)) 1267 goto out_free; 1268 } 1269 1270 dev = rt->u.dst.dev; 1271 1272 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { 1273 /* Do not fragment multicasts. Alas, IPv4 does not 1274 allow to send ICMP, so that packets will disappear 1275 to blackhole. 1276 */ 1277 1278 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 1279 ip_rt_put(rt); 1280 goto out_free; 1281 } 1282 1283 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; 1284 1285 if (skb_cow(skb, encap)) { 1286 ip_rt_put(rt); 1287 goto out_free; 1288 } 1289 1290 vif->pkt_out++; 1291 vif->bytes_out += skb->len; 1292 1293 dst_release(skb->dst); 1294 skb->dst = &rt->u.dst; 1295 ip_decrease_ttl(ip_hdr(skb)); 1296 1297 /* FIXME: forward and output firewalls used to be called here. 1298 * What do we do with netfilter? -- RR */ 1299 if (vif->flags & VIFF_TUNNEL) { 1300 ip_encap(skb, vif->local, vif->remote); 1301 /* FIXME: extra output firewall step used to be here. --RR */ 1302 vif->dev->stats.tx_packets++; 1303 vif->dev->stats.tx_bytes += skb->len; 1304 } 1305 1306 IPCB(skb)->flags |= IPSKB_FORWARDED; 1307 1308 /* 1309 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1310 * not only before forwarding, but after forwarding on all output 1311 * interfaces. It is clear, if mrouter runs a multicasting 1312 * program, it should receive packets not depending to what interface 1313 * program is joined. 1314 * If we will not make it, the program will have to join on all 1315 * interfaces. On the other hand, multihoming host (or router, but 1316 * not mrouter) cannot join to more than one interface - it will 1317 * result in receiving multiple packets. 1318 */ 1319 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev, 1320 ipmr_forward_finish); 1321 return; 1322 1323 out_free: 1324 kfree_skb(skb); 1325 return; 1326 } 1327 1328 static int ipmr_find_vif(struct net_device *dev) 1329 { 1330 struct net *net = dev_net(dev); 1331 int ct; 1332 for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) { 1333 if (net->ipv4.vif_table[ct].dev == dev) 1334 break; 1335 } 1336 return ct; 1337 } 1338 1339 /* "local" means that we should preserve one skb (for local delivery) */ 1340 1341 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local) 1342 { 1343 int psend = -1; 1344 int vif, ct; 1345 struct net *net = mfc_net(cache); 1346 1347 vif = cache->mfc_parent; 1348 cache->mfc_un.res.pkt++; 1349 cache->mfc_un.res.bytes += skb->len; 1350 1351 /* 1352 * Wrong interface: drop packet and (maybe) send PIM assert. 1353 */ 1354 if (net->ipv4.vif_table[vif].dev != skb->dev) { 1355 int true_vifi; 1356 1357 if (skb->rtable->fl.iif == 0) { 1358 /* It is our own packet, looped back. 1359 Very complicated situation... 1360 1361 The best workaround until routing daemons will be 1362 fixed is not to redistribute packet, if it was 1363 send through wrong interface. It means, that 1364 multicast applications WILL NOT work for 1365 (S,G), which have default multicast route pointing 1366 to wrong oif. In any case, it is not a good 1367 idea to use multicasting applications on router. 1368 */ 1369 goto dont_forward; 1370 } 1371 1372 cache->mfc_un.res.wrong_if++; 1373 true_vifi = ipmr_find_vif(skb->dev); 1374 1375 if (true_vifi >= 0 && net->ipv4.mroute_do_assert && 1376 /* pimsm uses asserts, when switching from RPT to SPT, 1377 so that we cannot check that packet arrived on an oif. 1378 It is bad, but otherwise we would need to move pretty 1379 large chunk of pimd to kernel. Ough... --ANK 1380 */ 1381 (net->ipv4.mroute_do_pim || 1382 cache->mfc_un.res.ttls[true_vifi] < 255) && 1383 time_after(jiffies, 1384 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 1385 cache->mfc_un.res.last_assert = jiffies; 1386 ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF); 1387 } 1388 goto dont_forward; 1389 } 1390 1391 net->ipv4.vif_table[vif].pkt_in++; 1392 net->ipv4.vif_table[vif].bytes_in += skb->len; 1393 1394 /* 1395 * Forward the frame 1396 */ 1397 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { 1398 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { 1399 if (psend != -1) { 1400 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1401 if (skb2) 1402 ipmr_queue_xmit(skb2, cache, psend); 1403 } 1404 psend = ct; 1405 } 1406 } 1407 if (psend != -1) { 1408 if (local) { 1409 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1410 if (skb2) 1411 ipmr_queue_xmit(skb2, cache, psend); 1412 } else { 1413 ipmr_queue_xmit(skb, cache, psend); 1414 return 0; 1415 } 1416 } 1417 1418 dont_forward: 1419 if (!local) 1420 kfree_skb(skb); 1421 return 0; 1422 } 1423 1424 1425 /* 1426 * Multicast packets for forwarding arrive here 1427 */ 1428 1429 int ip_mr_input(struct sk_buff *skb) 1430 { 1431 struct mfc_cache *cache; 1432 struct net *net = dev_net(skb->dev); 1433 int local = skb->rtable->rt_flags&RTCF_LOCAL; 1434 1435 /* Packet is looped back after forward, it should not be 1436 forwarded second time, but still can be delivered locally. 1437 */ 1438 if (IPCB(skb)->flags&IPSKB_FORWARDED) 1439 goto dont_forward; 1440 1441 if (!local) { 1442 if (IPCB(skb)->opt.router_alert) { 1443 if (ip_call_ra_chain(skb)) 1444 return 0; 1445 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){ 1446 /* IGMPv1 (and broken IGMPv2 implementations sort of 1447 Cisco IOS <= 11.2(8)) do not put router alert 1448 option to IGMP packets destined to routable 1449 groups. It is very bad, because it means 1450 that we can forward NO IGMP messages. 1451 */ 1452 read_lock(&mrt_lock); 1453 if (net->ipv4.mroute_sk) { 1454 nf_reset(skb); 1455 raw_rcv(net->ipv4.mroute_sk, skb); 1456 read_unlock(&mrt_lock); 1457 return 0; 1458 } 1459 read_unlock(&mrt_lock); 1460 } 1461 } 1462 1463 read_lock(&mrt_lock); 1464 cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 1465 1466 /* 1467 * No usable cache entry 1468 */ 1469 if (cache == NULL) { 1470 int vif; 1471 1472 if (local) { 1473 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1474 ip_local_deliver(skb); 1475 if (skb2 == NULL) { 1476 read_unlock(&mrt_lock); 1477 return -ENOBUFS; 1478 } 1479 skb = skb2; 1480 } 1481 1482 vif = ipmr_find_vif(skb->dev); 1483 if (vif >= 0) { 1484 int err = ipmr_cache_unresolved(net, vif, skb); 1485 read_unlock(&mrt_lock); 1486 1487 return err; 1488 } 1489 read_unlock(&mrt_lock); 1490 kfree_skb(skb); 1491 return -ENODEV; 1492 } 1493 1494 ip_mr_forward(skb, cache, local); 1495 1496 read_unlock(&mrt_lock); 1497 1498 if (local) 1499 return ip_local_deliver(skb); 1500 1501 return 0; 1502 1503 dont_forward: 1504 if (local) 1505 return ip_local_deliver(skb); 1506 kfree_skb(skb); 1507 return 0; 1508 } 1509 1510 #ifdef CONFIG_IP_PIMSM 1511 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) 1512 { 1513 struct net_device *reg_dev = NULL; 1514 struct iphdr *encap; 1515 struct net *net = dev_net(skb->dev); 1516 1517 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 1518 /* 1519 Check that: 1520 a. packet is really destinted to a multicast group 1521 b. packet is not a NULL-REGISTER 1522 c. packet is not truncated 1523 */ 1524 if (!ipv4_is_multicast(encap->daddr) || 1525 encap->tot_len == 0 || 1526 ntohs(encap->tot_len) + pimlen > skb->len) 1527 return 1; 1528 1529 read_lock(&mrt_lock); 1530 if (net->ipv4.mroute_reg_vif_num >= 0) 1531 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev; 1532 if (reg_dev) 1533 dev_hold(reg_dev); 1534 read_unlock(&mrt_lock); 1535 1536 if (reg_dev == NULL) 1537 return 1; 1538 1539 skb->mac_header = skb->network_header; 1540 skb_pull(skb, (u8*)encap - skb->data); 1541 skb_reset_network_header(skb); 1542 skb->dev = reg_dev; 1543 skb->protocol = htons(ETH_P_IP); 1544 skb->ip_summed = 0; 1545 skb->pkt_type = PACKET_HOST; 1546 dst_release(skb->dst); 1547 skb->dst = NULL; 1548 reg_dev->stats.rx_bytes += skb->len; 1549 reg_dev->stats.rx_packets++; 1550 nf_reset(skb); 1551 netif_rx(skb); 1552 dev_put(reg_dev); 1553 1554 return 0; 1555 } 1556 #endif 1557 1558 #ifdef CONFIG_IP_PIMSM_V1 1559 /* 1560 * Handle IGMP messages of PIMv1 1561 */ 1562 1563 int pim_rcv_v1(struct sk_buff * skb) 1564 { 1565 struct igmphdr *pim; 1566 struct net *net = dev_net(skb->dev); 1567 1568 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 1569 goto drop; 1570 1571 pim = igmp_hdr(skb); 1572 1573 if (!net->ipv4.mroute_do_pim || 1574 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 1575 goto drop; 1576 1577 if (__pim_rcv(skb, sizeof(*pim))) { 1578 drop: 1579 kfree_skb(skb); 1580 } 1581 return 0; 1582 } 1583 #endif 1584 1585 #ifdef CONFIG_IP_PIMSM_V2 1586 static int pim_rcv(struct sk_buff * skb) 1587 { 1588 struct pimreghdr *pim; 1589 1590 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 1591 goto drop; 1592 1593 pim = (struct pimreghdr *)skb_transport_header(skb); 1594 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || 1595 (pim->flags&PIM_NULL_REGISTER) || 1596 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 1597 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 1598 goto drop; 1599 1600 if (__pim_rcv(skb, sizeof(*pim))) { 1601 drop: 1602 kfree_skb(skb); 1603 } 1604 return 0; 1605 } 1606 #endif 1607 1608 static int 1609 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) 1610 { 1611 int ct; 1612 struct rtnexthop *nhp; 1613 struct net *net = mfc_net(c); 1614 struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev; 1615 u8 *b = skb_tail_pointer(skb); 1616 struct rtattr *mp_head; 1617 1618 if (dev) 1619 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); 1620 1621 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); 1622 1623 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 1624 if (c->mfc_un.res.ttls[ct] < 255) { 1625 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 1626 goto rtattr_failure; 1627 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 1628 nhp->rtnh_flags = 0; 1629 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 1630 nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex; 1631 nhp->rtnh_len = sizeof(*nhp); 1632 } 1633 } 1634 mp_head->rta_type = RTA_MULTIPATH; 1635 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; 1636 rtm->rtm_type = RTN_MULTICAST; 1637 return 1; 1638 1639 rtattr_failure: 1640 nlmsg_trim(skb, b); 1641 return -EMSGSIZE; 1642 } 1643 1644 int ipmr_get_route(struct net *net, 1645 struct sk_buff *skb, struct rtmsg *rtm, int nowait) 1646 { 1647 int err; 1648 struct mfc_cache *cache; 1649 struct rtable *rt = skb->rtable; 1650 1651 read_lock(&mrt_lock); 1652 cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst); 1653 1654 if (cache == NULL) { 1655 struct sk_buff *skb2; 1656 struct iphdr *iph; 1657 struct net_device *dev; 1658 int vif; 1659 1660 if (nowait) { 1661 read_unlock(&mrt_lock); 1662 return -EAGAIN; 1663 } 1664 1665 dev = skb->dev; 1666 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) { 1667 read_unlock(&mrt_lock); 1668 return -ENODEV; 1669 } 1670 skb2 = skb_clone(skb, GFP_ATOMIC); 1671 if (!skb2) { 1672 read_unlock(&mrt_lock); 1673 return -ENOMEM; 1674 } 1675 1676 skb_push(skb2, sizeof(struct iphdr)); 1677 skb_reset_network_header(skb2); 1678 iph = ip_hdr(skb2); 1679 iph->ihl = sizeof(struct iphdr) >> 2; 1680 iph->saddr = rt->rt_src; 1681 iph->daddr = rt->rt_dst; 1682 iph->version = 0; 1683 err = ipmr_cache_unresolved(net, vif, skb2); 1684 read_unlock(&mrt_lock); 1685 return err; 1686 } 1687 1688 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 1689 cache->mfc_flags |= MFC_NOTIFY; 1690 err = ipmr_fill_mroute(skb, cache, rtm); 1691 read_unlock(&mrt_lock); 1692 return err; 1693 } 1694 1695 #ifdef CONFIG_PROC_FS 1696 /* 1697 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif 1698 */ 1699 struct ipmr_vif_iter { 1700 struct seq_net_private p; 1701 int ct; 1702 }; 1703 1704 static struct vif_device *ipmr_vif_seq_idx(struct net *net, 1705 struct ipmr_vif_iter *iter, 1706 loff_t pos) 1707 { 1708 for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) { 1709 if (!VIF_EXISTS(net, iter->ct)) 1710 continue; 1711 if (pos-- == 0) 1712 return &net->ipv4.vif_table[iter->ct]; 1713 } 1714 return NULL; 1715 } 1716 1717 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 1718 __acquires(mrt_lock) 1719 { 1720 struct net *net = seq_file_net(seq); 1721 1722 read_lock(&mrt_lock); 1723 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) 1724 : SEQ_START_TOKEN; 1725 } 1726 1727 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1728 { 1729 struct ipmr_vif_iter *iter = seq->private; 1730 struct net *net = seq_file_net(seq); 1731 1732 ++*pos; 1733 if (v == SEQ_START_TOKEN) 1734 return ipmr_vif_seq_idx(net, iter, 0); 1735 1736 while (++iter->ct < net->ipv4.maxvif) { 1737 if (!VIF_EXISTS(net, iter->ct)) 1738 continue; 1739 return &net->ipv4.vif_table[iter->ct]; 1740 } 1741 return NULL; 1742 } 1743 1744 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 1745 __releases(mrt_lock) 1746 { 1747 read_unlock(&mrt_lock); 1748 } 1749 1750 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 1751 { 1752 struct net *net = seq_file_net(seq); 1753 1754 if (v == SEQ_START_TOKEN) { 1755 seq_puts(seq, 1756 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 1757 } else { 1758 const struct vif_device *vif = v; 1759 const char *name = vif->dev ? vif->dev->name : "none"; 1760 1761 seq_printf(seq, 1762 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 1763 vif - net->ipv4.vif_table, 1764 name, vif->bytes_in, vif->pkt_in, 1765 vif->bytes_out, vif->pkt_out, 1766 vif->flags, vif->local, vif->remote); 1767 } 1768 return 0; 1769 } 1770 1771 static const struct seq_operations ipmr_vif_seq_ops = { 1772 .start = ipmr_vif_seq_start, 1773 .next = ipmr_vif_seq_next, 1774 .stop = ipmr_vif_seq_stop, 1775 .show = ipmr_vif_seq_show, 1776 }; 1777 1778 static int ipmr_vif_open(struct inode *inode, struct file *file) 1779 { 1780 return seq_open_net(inode, file, &ipmr_vif_seq_ops, 1781 sizeof(struct ipmr_vif_iter)); 1782 } 1783 1784 static const struct file_operations ipmr_vif_fops = { 1785 .owner = THIS_MODULE, 1786 .open = ipmr_vif_open, 1787 .read = seq_read, 1788 .llseek = seq_lseek, 1789 .release = seq_release_net, 1790 }; 1791 1792 struct ipmr_mfc_iter { 1793 struct seq_net_private p; 1794 struct mfc_cache **cache; 1795 int ct; 1796 }; 1797 1798 1799 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, 1800 struct ipmr_mfc_iter *it, loff_t pos) 1801 { 1802 struct mfc_cache *mfc; 1803 1804 it->cache = net->ipv4.mfc_cache_array; 1805 read_lock(&mrt_lock); 1806 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) 1807 for (mfc = net->ipv4.mfc_cache_array[it->ct]; 1808 mfc; mfc = mfc->next) 1809 if (pos-- == 0) 1810 return mfc; 1811 read_unlock(&mrt_lock); 1812 1813 it->cache = &mfc_unres_queue; 1814 spin_lock_bh(&mfc_unres_lock); 1815 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next) 1816 if (net_eq(mfc_net(mfc), net) && 1817 pos-- == 0) 1818 return mfc; 1819 spin_unlock_bh(&mfc_unres_lock); 1820 1821 it->cache = NULL; 1822 return NULL; 1823 } 1824 1825 1826 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 1827 { 1828 struct ipmr_mfc_iter *it = seq->private; 1829 struct net *net = seq_file_net(seq); 1830 1831 it->cache = NULL; 1832 it->ct = 0; 1833 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) 1834 : SEQ_START_TOKEN; 1835 } 1836 1837 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1838 { 1839 struct mfc_cache *mfc = v; 1840 struct ipmr_mfc_iter *it = seq->private; 1841 struct net *net = seq_file_net(seq); 1842 1843 ++*pos; 1844 1845 if (v == SEQ_START_TOKEN) 1846 return ipmr_mfc_seq_idx(net, seq->private, 0); 1847 1848 if (mfc->next) 1849 return mfc->next; 1850 1851 if (it->cache == &mfc_unres_queue) 1852 goto end_of_list; 1853 1854 BUG_ON(it->cache != net->ipv4.mfc_cache_array); 1855 1856 while (++it->ct < MFC_LINES) { 1857 mfc = net->ipv4.mfc_cache_array[it->ct]; 1858 if (mfc) 1859 return mfc; 1860 } 1861 1862 /* exhausted cache_array, show unresolved */ 1863 read_unlock(&mrt_lock); 1864 it->cache = &mfc_unres_queue; 1865 it->ct = 0; 1866 1867 spin_lock_bh(&mfc_unres_lock); 1868 mfc = mfc_unres_queue; 1869 while (mfc && !net_eq(mfc_net(mfc), net)) 1870 mfc = mfc->next; 1871 if (mfc) 1872 return mfc; 1873 1874 end_of_list: 1875 spin_unlock_bh(&mfc_unres_lock); 1876 it->cache = NULL; 1877 1878 return NULL; 1879 } 1880 1881 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 1882 { 1883 struct ipmr_mfc_iter *it = seq->private; 1884 struct net *net = seq_file_net(seq); 1885 1886 if (it->cache == &mfc_unres_queue) 1887 spin_unlock_bh(&mfc_unres_lock); 1888 else if (it->cache == net->ipv4.mfc_cache_array) 1889 read_unlock(&mrt_lock); 1890 } 1891 1892 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 1893 { 1894 int n; 1895 struct net *net = seq_file_net(seq); 1896 1897 if (v == SEQ_START_TOKEN) { 1898 seq_puts(seq, 1899 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 1900 } else { 1901 const struct mfc_cache *mfc = v; 1902 const struct ipmr_mfc_iter *it = seq->private; 1903 1904 seq_printf(seq, "%08lX %08lX %-3hd", 1905 (unsigned long) mfc->mfc_mcastgrp, 1906 (unsigned long) mfc->mfc_origin, 1907 mfc->mfc_parent); 1908 1909 if (it->cache != &mfc_unres_queue) { 1910 seq_printf(seq, " %8lu %8lu %8lu", 1911 mfc->mfc_un.res.pkt, 1912 mfc->mfc_un.res.bytes, 1913 mfc->mfc_un.res.wrong_if); 1914 for (n = mfc->mfc_un.res.minvif; 1915 n < mfc->mfc_un.res.maxvif; n++ ) { 1916 if (VIF_EXISTS(net, n) && 1917 mfc->mfc_un.res.ttls[n] < 255) 1918 seq_printf(seq, 1919 " %2d:%-3d", 1920 n, mfc->mfc_un.res.ttls[n]); 1921 } 1922 } else { 1923 /* unresolved mfc_caches don't contain 1924 * pkt, bytes and wrong_if values 1925 */ 1926 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 1927 } 1928 seq_putc(seq, '\n'); 1929 } 1930 return 0; 1931 } 1932 1933 static const struct seq_operations ipmr_mfc_seq_ops = { 1934 .start = ipmr_mfc_seq_start, 1935 .next = ipmr_mfc_seq_next, 1936 .stop = ipmr_mfc_seq_stop, 1937 .show = ipmr_mfc_seq_show, 1938 }; 1939 1940 static int ipmr_mfc_open(struct inode *inode, struct file *file) 1941 { 1942 return seq_open_net(inode, file, &ipmr_mfc_seq_ops, 1943 sizeof(struct ipmr_mfc_iter)); 1944 } 1945 1946 static const struct file_operations ipmr_mfc_fops = { 1947 .owner = THIS_MODULE, 1948 .open = ipmr_mfc_open, 1949 .read = seq_read, 1950 .llseek = seq_lseek, 1951 .release = seq_release_net, 1952 }; 1953 #endif 1954 1955 #ifdef CONFIG_IP_PIMSM_V2 1956 static struct net_protocol pim_protocol = { 1957 .handler = pim_rcv, 1958 }; 1959 #endif 1960 1961 1962 /* 1963 * Setup for IP multicast routing 1964 */ 1965 static int __net_init ipmr_net_init(struct net *net) 1966 { 1967 int err = 0; 1968 1969 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device), 1970 GFP_KERNEL); 1971 if (!net->ipv4.vif_table) { 1972 err = -ENOMEM; 1973 goto fail; 1974 } 1975 1976 /* Forwarding cache */ 1977 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES, 1978 sizeof(struct mfc_cache *), 1979 GFP_KERNEL); 1980 if (!net->ipv4.mfc_cache_array) { 1981 err = -ENOMEM; 1982 goto fail_mfc_cache; 1983 } 1984 1985 #ifdef CONFIG_IP_PIMSM 1986 net->ipv4.mroute_reg_vif_num = -1; 1987 #endif 1988 1989 #ifdef CONFIG_PROC_FS 1990 err = -ENOMEM; 1991 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops)) 1992 goto proc_vif_fail; 1993 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops)) 1994 goto proc_cache_fail; 1995 #endif 1996 return 0; 1997 1998 #ifdef CONFIG_PROC_FS 1999 proc_cache_fail: 2000 proc_net_remove(net, "ip_mr_vif"); 2001 proc_vif_fail: 2002 kfree(net->ipv4.mfc_cache_array); 2003 #endif 2004 fail_mfc_cache: 2005 kfree(net->ipv4.vif_table); 2006 fail: 2007 return err; 2008 } 2009 2010 static void __net_exit ipmr_net_exit(struct net *net) 2011 { 2012 #ifdef CONFIG_PROC_FS 2013 proc_net_remove(net, "ip_mr_cache"); 2014 proc_net_remove(net, "ip_mr_vif"); 2015 #endif 2016 kfree(net->ipv4.mfc_cache_array); 2017 kfree(net->ipv4.vif_table); 2018 } 2019 2020 static struct pernet_operations ipmr_net_ops = { 2021 .init = ipmr_net_init, 2022 .exit = ipmr_net_exit, 2023 }; 2024 2025 int __init ip_mr_init(void) 2026 { 2027 int err; 2028 2029 mrt_cachep = kmem_cache_create("ip_mrt_cache", 2030 sizeof(struct mfc_cache), 2031 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 2032 NULL); 2033 if (!mrt_cachep) 2034 return -ENOMEM; 2035 2036 err = register_pernet_subsys(&ipmr_net_ops); 2037 if (err) 2038 goto reg_pernet_fail; 2039 2040 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); 2041 err = register_netdevice_notifier(&ip_mr_notifier); 2042 if (err) 2043 goto reg_notif_fail; 2044 return 0; 2045 2046 reg_notif_fail: 2047 del_timer(&ipmr_expire_timer); 2048 unregister_pernet_subsys(&ipmr_net_ops); 2049 reg_pernet_fail: 2050 kmem_cache_destroy(mrt_cachep); 2051 return err; 2052 } 2053