1 /* 2 * Linux IPv6 multicast routing support for BSD pim6sd 3 * Based on net/ipv4/ipmr.c. 4 * 5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> 6 * LSIIT Laboratory, Strasbourg, France 7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> 8 * 6WIND, Paris, France 9 * Copyright (C)2007,2008 USAGI/WIDE Project 10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 * 17 */ 18 19 #include <asm/system.h> 20 #include <asm/uaccess.h> 21 #include <linux/types.h> 22 #include <linux/sched.h> 23 #include <linux/errno.h> 24 #include <linux/timer.h> 25 #include <linux/mm.h> 26 #include <linux/kernel.h> 27 #include <linux/fcntl.h> 28 #include <linux/stat.h> 29 #include <linux/socket.h> 30 #include <linux/inet.h> 31 #include <linux/netdevice.h> 32 #include <linux/inetdevice.h> 33 #include <linux/proc_fs.h> 34 #include <linux/seq_file.h> 35 #include <linux/init.h> 36 #include <net/protocol.h> 37 #include <linux/skbuff.h> 38 #include <net/sock.h> 39 #include <net/raw.h> 40 #include <linux/notifier.h> 41 #include <linux/if_arp.h> 42 #include <net/checksum.h> 43 #include <net/netlink.h> 44 45 #include <net/ipv6.h> 46 #include <net/ip6_route.h> 47 #include <linux/mroute6.h> 48 #include <linux/pim.h> 49 #include <net/addrconf.h> 50 #include <linux/netfilter_ipv6.h> 51 52 struct sock *mroute6_socket; 53 54 55 /* Big lock, protecting vif table, mrt cache and mroute socket state. 56 Note that the changes are semaphored via rtnl_lock. 57 */ 58 59 static DEFINE_RWLOCK(mrt_lock); 60 61 /* 62 * Multicast router control variables 63 */ 64 65 static struct mif_device vif6_table[MAXMIFS]; /* Devices */ 66 static int maxvif; 67 68 #define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL) 69 70 static int mroute_do_assert; /* Set in PIM assert */ 71 #ifdef CONFIG_IPV6_PIMSM_V2 72 static int mroute_do_pim; 73 #else 74 #define mroute_do_pim 0 75 #endif 76 77 static struct mfc6_cache *mfc6_cache_array[MFC6_LINES]; /* Forwarding cache */ 78 79 static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */ 80 static atomic_t cache_resolve_queue_len; /* Size of unresolved */ 81 82 /* Special spinlock for queue of unresolved entries */ 83 static DEFINE_SPINLOCK(mfc_unres_lock); 84 85 /* We return to original Alan's scheme. Hash table of resolved 86 entries is changed only in process context and protected 87 with weak lock mrt_lock. Queue of unresolved entries is protected 88 with strong spinlock mfc_unres_lock. 89 90 In this case data path is free of exclusive locks at all. 91 */ 92 93 static struct kmem_cache *mrt_cachep __read_mostly; 94 95 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache); 96 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert); 97 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm); 98 99 #ifdef CONFIG_IPV6_PIMSM_V2 100 static struct inet6_protocol pim6_protocol; 101 #endif 102 103 static struct timer_list ipmr_expire_timer; 104 105 106 #ifdef CONFIG_PROC_FS 107 108 struct ipmr_mfc_iter { 109 struct mfc6_cache **cache; 110 int ct; 111 }; 112 113 114 static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) 115 { 116 struct mfc6_cache *mfc; 117 118 it->cache = mfc6_cache_array; 119 read_lock(&mrt_lock); 120 for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++) 121 for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next) 122 if (pos-- == 0) 123 return mfc; 124 read_unlock(&mrt_lock); 125 126 it->cache = &mfc_unres_queue; 127 spin_lock_bh(&mfc_unres_lock); 128 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next) 129 if (pos-- == 0) 130 return mfc; 131 spin_unlock_bh(&mfc_unres_lock); 132 133 it->cache = NULL; 134 return NULL; 135 } 136 137 138 139 140 /* 141 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif 142 */ 143 144 struct ipmr_vif_iter { 145 int ct; 146 }; 147 148 static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter, 149 loff_t pos) 150 { 151 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { 152 if (!MIF_EXISTS(iter->ct)) 153 continue; 154 if (pos-- == 0) 155 return &vif6_table[iter->ct]; 156 } 157 return NULL; 158 } 159 160 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) 161 __acquires(mrt_lock) 162 { 163 read_lock(&mrt_lock); 164 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1) 165 : SEQ_START_TOKEN); 166 } 167 168 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 169 { 170 struct ipmr_vif_iter *iter = seq->private; 171 172 ++*pos; 173 if (v == SEQ_START_TOKEN) 174 return ip6mr_vif_seq_idx(iter, 0); 175 176 while (++iter->ct < maxvif) { 177 if (!MIF_EXISTS(iter->ct)) 178 continue; 179 return &vif6_table[iter->ct]; 180 } 181 return NULL; 182 } 183 184 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) 185 __releases(mrt_lock) 186 { 187 read_unlock(&mrt_lock); 188 } 189 190 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) 191 { 192 if (v == SEQ_START_TOKEN) { 193 seq_puts(seq, 194 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); 195 } else { 196 const struct mif_device *vif = v; 197 const char *name = vif->dev ? vif->dev->name : "none"; 198 199 seq_printf(seq, 200 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", 201 vif - vif6_table, 202 name, vif->bytes_in, vif->pkt_in, 203 vif->bytes_out, vif->pkt_out, 204 vif->flags); 205 } 206 return 0; 207 } 208 209 static struct seq_operations ip6mr_vif_seq_ops = { 210 .start = ip6mr_vif_seq_start, 211 .next = ip6mr_vif_seq_next, 212 .stop = ip6mr_vif_seq_stop, 213 .show = ip6mr_vif_seq_show, 214 }; 215 216 static int ip6mr_vif_open(struct inode *inode, struct file *file) 217 { 218 return seq_open_private(file, &ip6mr_vif_seq_ops, 219 sizeof(struct ipmr_vif_iter)); 220 } 221 222 static struct file_operations ip6mr_vif_fops = { 223 .owner = THIS_MODULE, 224 .open = ip6mr_vif_open, 225 .read = seq_read, 226 .llseek = seq_lseek, 227 .release = seq_release, 228 }; 229 230 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 231 { 232 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) 233 : SEQ_START_TOKEN); 234 } 235 236 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 237 { 238 struct mfc6_cache *mfc = v; 239 struct ipmr_mfc_iter *it = seq->private; 240 241 ++*pos; 242 243 if (v == SEQ_START_TOKEN) 244 return ipmr_mfc_seq_idx(seq->private, 0); 245 246 if (mfc->next) 247 return mfc->next; 248 249 if (it->cache == &mfc_unres_queue) 250 goto end_of_list; 251 252 BUG_ON(it->cache != mfc6_cache_array); 253 254 while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) { 255 mfc = mfc6_cache_array[it->ct]; 256 if (mfc) 257 return mfc; 258 } 259 260 /* exhausted cache_array, show unresolved */ 261 read_unlock(&mrt_lock); 262 it->cache = &mfc_unres_queue; 263 it->ct = 0; 264 265 spin_lock_bh(&mfc_unres_lock); 266 mfc = mfc_unres_queue; 267 if (mfc) 268 return mfc; 269 270 end_of_list: 271 spin_unlock_bh(&mfc_unres_lock); 272 it->cache = NULL; 273 274 return NULL; 275 } 276 277 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 278 { 279 struct ipmr_mfc_iter *it = seq->private; 280 281 if (it->cache == &mfc_unres_queue) 282 spin_unlock_bh(&mfc_unres_lock); 283 else if (it->cache == mfc6_cache_array) 284 read_unlock(&mrt_lock); 285 } 286 287 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 288 { 289 int n; 290 291 if (v == SEQ_START_TOKEN) { 292 seq_puts(seq, 293 "Group " 294 "Origin " 295 "Iif Pkts Bytes Wrong Oifs\n"); 296 } else { 297 const struct mfc6_cache *mfc = v; 298 const struct ipmr_mfc_iter *it = seq->private; 299 300 seq_printf(seq, 301 NIP6_FMT " " NIP6_FMT " %-3d %8ld %8ld %8ld", 302 NIP6(mfc->mf6c_mcastgrp), NIP6(mfc->mf6c_origin), 303 mfc->mf6c_parent, 304 mfc->mfc_un.res.pkt, 305 mfc->mfc_un.res.bytes, 306 mfc->mfc_un.res.wrong_if); 307 308 if (it->cache != &mfc_unres_queue) { 309 for (n = mfc->mfc_un.res.minvif; 310 n < mfc->mfc_un.res.maxvif; n++) { 311 if (MIF_EXISTS(n) && 312 mfc->mfc_un.res.ttls[n] < 255) 313 seq_printf(seq, 314 " %2d:%-3d", 315 n, mfc->mfc_un.res.ttls[n]); 316 } 317 } 318 seq_putc(seq, '\n'); 319 } 320 return 0; 321 } 322 323 static struct seq_operations ipmr_mfc_seq_ops = { 324 .start = ipmr_mfc_seq_start, 325 .next = ipmr_mfc_seq_next, 326 .stop = ipmr_mfc_seq_stop, 327 .show = ipmr_mfc_seq_show, 328 }; 329 330 static int ipmr_mfc_open(struct inode *inode, struct file *file) 331 { 332 return seq_open_private(file, &ipmr_mfc_seq_ops, 333 sizeof(struct ipmr_mfc_iter)); 334 } 335 336 static struct file_operations ip6mr_mfc_fops = { 337 .owner = THIS_MODULE, 338 .open = ipmr_mfc_open, 339 .read = seq_read, 340 .llseek = seq_lseek, 341 .release = seq_release, 342 }; 343 #endif 344 345 #ifdef CONFIG_IPV6_PIMSM_V2 346 static int reg_vif_num = -1; 347 348 static int pim6_rcv(struct sk_buff *skb) 349 { 350 struct pimreghdr *pim; 351 struct ipv6hdr *encap; 352 struct net_device *reg_dev = NULL; 353 354 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 355 goto drop; 356 357 pim = (struct pimreghdr *)skb_transport_header(skb); 358 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) || 359 (pim->flags & PIM_NULL_REGISTER) || 360 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 361 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 362 goto drop; 363 364 /* check if the inner packet is destined to mcast group */ 365 encap = (struct ipv6hdr *)(skb_transport_header(skb) + 366 sizeof(*pim)); 367 368 if (!ipv6_addr_is_multicast(&encap->daddr) || 369 encap->payload_len == 0 || 370 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 371 goto drop; 372 373 read_lock(&mrt_lock); 374 if (reg_vif_num >= 0) 375 reg_dev = vif6_table[reg_vif_num].dev; 376 if (reg_dev) 377 dev_hold(reg_dev); 378 read_unlock(&mrt_lock); 379 380 if (reg_dev == NULL) 381 goto drop; 382 383 skb->mac_header = skb->network_header; 384 skb_pull(skb, (u8 *)encap - skb->data); 385 skb_reset_network_header(skb); 386 skb->dev = reg_dev; 387 skb->protocol = htons(ETH_P_IP); 388 skb->ip_summed = 0; 389 skb->pkt_type = PACKET_HOST; 390 dst_release(skb->dst); 391 reg_dev->stats.rx_bytes += skb->len; 392 reg_dev->stats.rx_packets++; 393 skb->dst = NULL; 394 nf_reset(skb); 395 netif_rx(skb); 396 dev_put(reg_dev); 397 return 0; 398 drop: 399 kfree_skb(skb); 400 return 0; 401 } 402 403 static struct inet6_protocol pim6_protocol = { 404 .handler = pim6_rcv, 405 }; 406 407 /* Service routines creating virtual interfaces: PIMREG */ 408 409 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 410 { 411 read_lock(&mrt_lock); 412 dev->stats.tx_bytes += skb->len; 413 dev->stats.tx_packets++; 414 ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT); 415 read_unlock(&mrt_lock); 416 kfree_skb(skb); 417 return 0; 418 } 419 420 static void reg_vif_setup(struct net_device *dev) 421 { 422 dev->type = ARPHRD_PIMREG; 423 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; 424 dev->flags = IFF_NOARP; 425 dev->hard_start_xmit = reg_vif_xmit; 426 dev->destructor = free_netdev; 427 } 428 429 static struct net_device *ip6mr_reg_vif(void) 430 { 431 struct net_device *dev; 432 433 dev = alloc_netdev(0, "pim6reg", reg_vif_setup); 434 if (dev == NULL) 435 return NULL; 436 437 if (register_netdevice(dev)) { 438 free_netdev(dev); 439 return NULL; 440 } 441 dev->iflink = 0; 442 443 if (dev_open(dev)) 444 goto failure; 445 446 dev_hold(dev); 447 return dev; 448 449 failure: 450 /* allow the register to be completed before unregistering. */ 451 rtnl_unlock(); 452 rtnl_lock(); 453 454 unregister_netdevice(dev); 455 return NULL; 456 } 457 #endif 458 459 /* 460 * Delete a VIF entry 461 */ 462 463 static int mif6_delete(int vifi) 464 { 465 struct mif_device *v; 466 struct net_device *dev; 467 if (vifi < 0 || vifi >= maxvif) 468 return -EADDRNOTAVAIL; 469 470 v = &vif6_table[vifi]; 471 472 write_lock_bh(&mrt_lock); 473 dev = v->dev; 474 v->dev = NULL; 475 476 if (!dev) { 477 write_unlock_bh(&mrt_lock); 478 return -EADDRNOTAVAIL; 479 } 480 481 #ifdef CONFIG_IPV6_PIMSM_V2 482 if (vifi == reg_vif_num) 483 reg_vif_num = -1; 484 #endif 485 486 if (vifi + 1 == maxvif) { 487 int tmp; 488 for (tmp = vifi - 1; tmp >= 0; tmp--) { 489 if (MIF_EXISTS(tmp)) 490 break; 491 } 492 maxvif = tmp + 1; 493 } 494 495 write_unlock_bh(&mrt_lock); 496 497 dev_set_allmulti(dev, -1); 498 499 if (v->flags & MIFF_REGISTER) 500 unregister_netdevice(dev); 501 502 dev_put(dev); 503 return 0; 504 } 505 506 /* Destroy an unresolved cache entry, killing queued skbs 507 and reporting error to netlink readers. 508 */ 509 510 static void ip6mr_destroy_unres(struct mfc6_cache *c) 511 { 512 struct sk_buff *skb; 513 514 atomic_dec(&cache_resolve_queue_len); 515 516 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { 517 if (ipv6_hdr(skb)->version == 0) { 518 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); 519 nlh->nlmsg_type = NLMSG_ERROR; 520 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 521 skb_trim(skb, nlh->nlmsg_len); 522 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; 523 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); 524 } else 525 kfree_skb(skb); 526 } 527 528 kmem_cache_free(mrt_cachep, c); 529 } 530 531 532 /* Single timer process for all the unresolved queue. */ 533 534 static void ipmr_do_expire_process(unsigned long dummy) 535 { 536 unsigned long now = jiffies; 537 unsigned long expires = 10 * HZ; 538 struct mfc6_cache *c, **cp; 539 540 cp = &mfc_unres_queue; 541 542 while ((c = *cp) != NULL) { 543 if (time_after(c->mfc_un.unres.expires, now)) { 544 /* not yet... */ 545 unsigned long interval = c->mfc_un.unres.expires - now; 546 if (interval < expires) 547 expires = interval; 548 cp = &c->next; 549 continue; 550 } 551 552 *cp = c->next; 553 ip6mr_destroy_unres(c); 554 } 555 556 if (atomic_read(&cache_resolve_queue_len)) 557 mod_timer(&ipmr_expire_timer, jiffies + expires); 558 } 559 560 static void ipmr_expire_process(unsigned long dummy) 561 { 562 if (!spin_trylock(&mfc_unres_lock)) { 563 mod_timer(&ipmr_expire_timer, jiffies + 1); 564 return; 565 } 566 567 if (atomic_read(&cache_resolve_queue_len)) 568 ipmr_do_expire_process(dummy); 569 570 spin_unlock(&mfc_unres_lock); 571 } 572 573 /* Fill oifs list. It is called under write locked mrt_lock. */ 574 575 static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls) 576 { 577 int vifi; 578 579 cache->mfc_un.res.minvif = MAXMIFS; 580 cache->mfc_un.res.maxvif = 0; 581 memset(cache->mfc_un.res.ttls, 255, MAXMIFS); 582 583 for (vifi = 0; vifi < maxvif; vifi++) { 584 if (MIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) { 585 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 586 if (cache->mfc_un.res.minvif > vifi) 587 cache->mfc_un.res.minvif = vifi; 588 if (cache->mfc_un.res.maxvif <= vifi) 589 cache->mfc_un.res.maxvif = vifi + 1; 590 } 591 } 592 } 593 594 static int mif6_add(struct mif6ctl *vifc, int mrtsock) 595 { 596 int vifi = vifc->mif6c_mifi; 597 struct mif_device *v = &vif6_table[vifi]; 598 struct net_device *dev; 599 int err; 600 601 /* Is vif busy ? */ 602 if (MIF_EXISTS(vifi)) 603 return -EADDRINUSE; 604 605 switch (vifc->mif6c_flags) { 606 #ifdef CONFIG_IPV6_PIMSM_V2 607 case MIFF_REGISTER: 608 /* 609 * Special Purpose VIF in PIM 610 * All the packets will be sent to the daemon 611 */ 612 if (reg_vif_num >= 0) 613 return -EADDRINUSE; 614 dev = ip6mr_reg_vif(); 615 if (!dev) 616 return -ENOBUFS; 617 err = dev_set_allmulti(dev, 1); 618 if (err) { 619 unregister_netdevice(dev); 620 dev_put(dev); 621 return err; 622 } 623 break; 624 #endif 625 case 0: 626 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi); 627 if (!dev) 628 return -EADDRNOTAVAIL; 629 err = dev_set_allmulti(dev, 1); 630 if (err) { 631 dev_put(dev); 632 return err; 633 } 634 break; 635 default: 636 return -EINVAL; 637 } 638 639 /* 640 * Fill in the VIF structures 641 */ 642 v->rate_limit = vifc->vifc_rate_limit; 643 v->flags = vifc->mif6c_flags; 644 if (!mrtsock) 645 v->flags |= VIFF_STATIC; 646 v->threshold = vifc->vifc_threshold; 647 v->bytes_in = 0; 648 v->bytes_out = 0; 649 v->pkt_in = 0; 650 v->pkt_out = 0; 651 v->link = dev->ifindex; 652 if (v->flags & MIFF_REGISTER) 653 v->link = dev->iflink; 654 655 /* And finish update writing critical data */ 656 write_lock_bh(&mrt_lock); 657 v->dev = dev; 658 #ifdef CONFIG_IPV6_PIMSM_V2 659 if (v->flags & MIFF_REGISTER) 660 reg_vif_num = vifi; 661 #endif 662 if (vifi + 1 > maxvif) 663 maxvif = vifi + 1; 664 write_unlock_bh(&mrt_lock); 665 return 0; 666 } 667 668 static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp) 669 { 670 int line = MFC6_HASH(mcastgrp, origin); 671 struct mfc6_cache *c; 672 673 for (c = mfc6_cache_array[line]; c; c = c->next) { 674 if (ipv6_addr_equal(&c->mf6c_origin, origin) && 675 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) 676 break; 677 } 678 return c; 679 } 680 681 /* 682 * Allocate a multicast cache entry 683 */ 684 static struct mfc6_cache *ip6mr_cache_alloc(void) 685 { 686 struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_KERNEL); 687 if (c == NULL) 688 return NULL; 689 memset(c, 0, sizeof(*c)); 690 c->mfc_un.res.minvif = MAXMIFS; 691 return c; 692 } 693 694 static struct mfc6_cache *ip6mr_cache_alloc_unres(void) 695 { 696 struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_ATOMIC); 697 if (c == NULL) 698 return NULL; 699 memset(c, 0, sizeof(*c)); 700 skb_queue_head_init(&c->mfc_un.unres.unresolved); 701 c->mfc_un.unres.expires = jiffies + 10 * HZ; 702 return c; 703 } 704 705 /* 706 * A cache entry has gone into a resolved state from queued 707 */ 708 709 static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c) 710 { 711 struct sk_buff *skb; 712 713 /* 714 * Play the pending entries through our router 715 */ 716 717 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 718 if (ipv6_hdr(skb)->version == 0) { 719 int err; 720 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); 721 722 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { 723 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 724 } else { 725 nlh->nlmsg_type = NLMSG_ERROR; 726 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 727 skb_trim(skb, nlh->nlmsg_len); 728 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; 729 } 730 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); 731 } else 732 ip6_mr_forward(skb, c); 733 } 734 } 735 736 /* 737 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd 738 * expects the following bizarre scheme. 739 * 740 * Called under mrt_lock. 741 */ 742 743 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) 744 { 745 struct sk_buff *skb; 746 struct mrt6msg *msg; 747 int ret; 748 749 #ifdef CONFIG_IPV6_PIMSM_V2 750 if (assert == MRT6MSG_WHOLEPKT) 751 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) 752 +sizeof(*msg)); 753 else 754 #endif 755 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); 756 757 if (!skb) 758 return -ENOBUFS; 759 760 /* I suppose that internal messages 761 * do not require checksums */ 762 763 skb->ip_summed = CHECKSUM_UNNECESSARY; 764 765 #ifdef CONFIG_IPV6_PIMSM_V2 766 if (assert == MRT6MSG_WHOLEPKT) { 767 /* Ugly, but we have no choice with this interface. 768 Duplicate old header, fix length etc. 769 And all this only to mangle msg->im6_msgtype and 770 to set msg->im6_mbz to "mbz" :-) 771 */ 772 skb_push(skb, -skb_network_offset(pkt)); 773 774 skb_push(skb, sizeof(*msg)); 775 skb_reset_transport_header(skb); 776 msg = (struct mrt6msg *)skb_transport_header(skb); 777 msg->im6_mbz = 0; 778 msg->im6_msgtype = MRT6MSG_WHOLEPKT; 779 msg->im6_mif = reg_vif_num; 780 msg->im6_pad = 0; 781 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); 782 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); 783 784 skb->ip_summed = CHECKSUM_UNNECESSARY; 785 } else 786 #endif 787 { 788 /* 789 * Copy the IP header 790 */ 791 792 skb_put(skb, sizeof(struct ipv6hdr)); 793 skb_reset_network_header(skb); 794 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); 795 796 /* 797 * Add our header 798 */ 799 skb_put(skb, sizeof(*msg)); 800 skb_reset_transport_header(skb); 801 msg = (struct mrt6msg *)skb_transport_header(skb); 802 803 msg->im6_mbz = 0; 804 msg->im6_msgtype = assert; 805 msg->im6_mif = mifi; 806 msg->im6_pad = 0; 807 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); 808 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); 809 810 skb->dst = dst_clone(pkt->dst); 811 skb->ip_summed = CHECKSUM_UNNECESSARY; 812 813 skb_pull(skb, sizeof(struct ipv6hdr)); 814 } 815 816 if (mroute6_socket == NULL) { 817 kfree_skb(skb); 818 return -EINVAL; 819 } 820 821 /* 822 * Deliver to user space multicast routing algorithms 823 */ 824 if ((ret = sock_queue_rcv_skb(mroute6_socket, skb)) < 0) { 825 if (net_ratelimit()) 826 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n"); 827 kfree_skb(skb); 828 } 829 830 return ret; 831 } 832 833 /* 834 * Queue a packet for resolution. It gets locked cache entry! 835 */ 836 837 static int 838 ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) 839 { 840 int err; 841 struct mfc6_cache *c; 842 843 spin_lock_bh(&mfc_unres_lock); 844 for (c = mfc_unres_queue; c; c = c->next) { 845 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && 846 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) 847 break; 848 } 849 850 if (c == NULL) { 851 /* 852 * Create a new entry if allowable 853 */ 854 855 if (atomic_read(&cache_resolve_queue_len) >= 10 || 856 (c = ip6mr_cache_alloc_unres()) == NULL) { 857 spin_unlock_bh(&mfc_unres_lock); 858 859 kfree_skb(skb); 860 return -ENOBUFS; 861 } 862 863 /* 864 * Fill in the new cache entry 865 */ 866 c->mf6c_parent = -1; 867 c->mf6c_origin = ipv6_hdr(skb)->saddr; 868 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; 869 870 /* 871 * Reflect first query at pim6sd 872 */ 873 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) { 874 /* If the report failed throw the cache entry 875 out - Brad Parker 876 */ 877 spin_unlock_bh(&mfc_unres_lock); 878 879 kmem_cache_free(mrt_cachep, c); 880 kfree_skb(skb); 881 return err; 882 } 883 884 atomic_inc(&cache_resolve_queue_len); 885 c->next = mfc_unres_queue; 886 mfc_unres_queue = c; 887 888 ipmr_do_expire_process(1); 889 } 890 891 /* 892 * See if we can append the packet 893 */ 894 if (c->mfc_un.unres.unresolved.qlen > 3) { 895 kfree_skb(skb); 896 err = -ENOBUFS; 897 } else { 898 skb_queue_tail(&c->mfc_un.unres.unresolved, skb); 899 err = 0; 900 } 901 902 spin_unlock_bh(&mfc_unres_lock); 903 return err; 904 } 905 906 /* 907 * MFC6 cache manipulation by user space 908 */ 909 910 static int ip6mr_mfc_delete(struct mf6cctl *mfc) 911 { 912 int line; 913 struct mfc6_cache *c, **cp; 914 915 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); 916 917 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) { 918 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && 919 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { 920 write_lock_bh(&mrt_lock); 921 *cp = c->next; 922 write_unlock_bh(&mrt_lock); 923 924 kmem_cache_free(mrt_cachep, c); 925 return 0; 926 } 927 } 928 return -ENOENT; 929 } 930 931 static int ip6mr_device_event(struct notifier_block *this, 932 unsigned long event, void *ptr) 933 { 934 struct net_device *dev = ptr; 935 struct mif_device *v; 936 int ct; 937 938 if (!net_eq(dev_net(dev), &init_net)) 939 return NOTIFY_DONE; 940 941 if (event != NETDEV_UNREGISTER) 942 return NOTIFY_DONE; 943 944 v = &vif6_table[0]; 945 for (ct = 0; ct < maxvif; ct++, v++) { 946 if (v->dev == dev) 947 mif6_delete(ct); 948 } 949 return NOTIFY_DONE; 950 } 951 952 static struct notifier_block ip6_mr_notifier = { 953 .notifier_call = ip6mr_device_event 954 }; 955 956 /* 957 * Setup for IP multicast routing 958 */ 959 960 int __init ip6_mr_init(void) 961 { 962 int err; 963 964 mrt_cachep = kmem_cache_create("ip6_mrt_cache", 965 sizeof(struct mfc6_cache), 966 0, SLAB_HWCACHE_ALIGN, 967 NULL); 968 if (!mrt_cachep) 969 return -ENOMEM; 970 971 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); 972 err = register_netdevice_notifier(&ip6_mr_notifier); 973 if (err) 974 goto reg_notif_fail; 975 #ifdef CONFIG_PROC_FS 976 err = -ENOMEM; 977 if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops)) 978 goto proc_vif_fail; 979 if (!proc_net_fops_create(&init_net, "ip6_mr_cache", 980 0, &ip6mr_mfc_fops)) 981 goto proc_cache_fail; 982 #endif 983 return 0; 984 reg_notif_fail: 985 kmem_cache_destroy(mrt_cachep); 986 #ifdef CONFIG_PROC_FS 987 proc_vif_fail: 988 unregister_netdevice_notifier(&ip6_mr_notifier); 989 proc_cache_fail: 990 proc_net_remove(&init_net, "ip6_mr_vif"); 991 #endif 992 return err; 993 } 994 995 void ip6_mr_cleanup(void) 996 { 997 #ifdef CONFIG_PROC_FS 998 proc_net_remove(&init_net, "ip6_mr_cache"); 999 proc_net_remove(&init_net, "ip6_mr_vif"); 1000 #endif 1001 unregister_netdevice_notifier(&ip6_mr_notifier); 1002 del_timer(&ipmr_expire_timer); 1003 kmem_cache_destroy(mrt_cachep); 1004 } 1005 1006 static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) 1007 { 1008 int line; 1009 struct mfc6_cache *uc, *c, **cp; 1010 unsigned char ttls[MAXMIFS]; 1011 int i; 1012 1013 memset(ttls, 255, MAXMIFS); 1014 for (i = 0; i < MAXMIFS; i++) { 1015 if (IF_ISSET(i, &mfc->mf6cc_ifset)) 1016 ttls[i] = 1; 1017 1018 } 1019 1020 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); 1021 1022 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) { 1023 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && 1024 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) 1025 break; 1026 } 1027 1028 if (c != NULL) { 1029 write_lock_bh(&mrt_lock); 1030 c->mf6c_parent = mfc->mf6cc_parent; 1031 ip6mr_update_thresholds(c, ttls); 1032 if (!mrtsock) 1033 c->mfc_flags |= MFC_STATIC; 1034 write_unlock_bh(&mrt_lock); 1035 return 0; 1036 } 1037 1038 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) 1039 return -EINVAL; 1040 1041 c = ip6mr_cache_alloc(); 1042 if (c == NULL) 1043 return -ENOMEM; 1044 1045 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; 1046 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; 1047 c->mf6c_parent = mfc->mf6cc_parent; 1048 ip6mr_update_thresholds(c, ttls); 1049 if (!mrtsock) 1050 c->mfc_flags |= MFC_STATIC; 1051 1052 write_lock_bh(&mrt_lock); 1053 c->next = mfc6_cache_array[line]; 1054 mfc6_cache_array[line] = c; 1055 write_unlock_bh(&mrt_lock); 1056 1057 /* 1058 * Check to see if we resolved a queued list. If so we 1059 * need to send on the frames and tidy up. 1060 */ 1061 spin_lock_bh(&mfc_unres_lock); 1062 for (cp = &mfc_unres_queue; (uc = *cp) != NULL; 1063 cp = &uc->next) { 1064 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && 1065 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { 1066 *cp = uc->next; 1067 if (atomic_dec_and_test(&cache_resolve_queue_len)) 1068 del_timer(&ipmr_expire_timer); 1069 break; 1070 } 1071 } 1072 spin_unlock_bh(&mfc_unres_lock); 1073 1074 if (uc) { 1075 ip6mr_cache_resolve(uc, c); 1076 kmem_cache_free(mrt_cachep, uc); 1077 } 1078 return 0; 1079 } 1080 1081 /* 1082 * Close the multicast socket, and clear the vif tables etc 1083 */ 1084 1085 static void mroute_clean_tables(struct sock *sk) 1086 { 1087 int i; 1088 1089 /* 1090 * Shut down all active vif entries 1091 */ 1092 for (i = 0; i < maxvif; i++) { 1093 if (!(vif6_table[i].flags & VIFF_STATIC)) 1094 mif6_delete(i); 1095 } 1096 1097 /* 1098 * Wipe the cache 1099 */ 1100 for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) { 1101 struct mfc6_cache *c, **cp; 1102 1103 cp = &mfc6_cache_array[i]; 1104 while ((c = *cp) != NULL) { 1105 if (c->mfc_flags & MFC_STATIC) { 1106 cp = &c->next; 1107 continue; 1108 } 1109 write_lock_bh(&mrt_lock); 1110 *cp = c->next; 1111 write_unlock_bh(&mrt_lock); 1112 1113 kmem_cache_free(mrt_cachep, c); 1114 } 1115 } 1116 1117 if (atomic_read(&cache_resolve_queue_len) != 0) { 1118 struct mfc6_cache *c; 1119 1120 spin_lock_bh(&mfc_unres_lock); 1121 while (mfc_unres_queue != NULL) { 1122 c = mfc_unres_queue; 1123 mfc_unres_queue = c->next; 1124 spin_unlock_bh(&mfc_unres_lock); 1125 1126 ip6mr_destroy_unres(c); 1127 1128 spin_lock_bh(&mfc_unres_lock); 1129 } 1130 spin_unlock_bh(&mfc_unres_lock); 1131 } 1132 } 1133 1134 static int ip6mr_sk_init(struct sock *sk) 1135 { 1136 int err = 0; 1137 1138 rtnl_lock(); 1139 write_lock_bh(&mrt_lock); 1140 if (likely(mroute6_socket == NULL)) 1141 mroute6_socket = sk; 1142 else 1143 err = -EADDRINUSE; 1144 write_unlock_bh(&mrt_lock); 1145 1146 rtnl_unlock(); 1147 1148 return err; 1149 } 1150 1151 int ip6mr_sk_done(struct sock *sk) 1152 { 1153 int err = 0; 1154 1155 rtnl_lock(); 1156 if (sk == mroute6_socket) { 1157 write_lock_bh(&mrt_lock); 1158 mroute6_socket = NULL; 1159 write_unlock_bh(&mrt_lock); 1160 1161 mroute_clean_tables(sk); 1162 } else 1163 err = -EACCES; 1164 rtnl_unlock(); 1165 1166 return err; 1167 } 1168 1169 /* 1170 * Socket options and virtual interface manipulation. The whole 1171 * virtual interface system is a complete heap, but unfortunately 1172 * that's how BSD mrouted happens to think. Maybe one day with a proper 1173 * MOSPF/PIM router set up we can clean this up. 1174 */ 1175 1176 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen) 1177 { 1178 int ret; 1179 struct mif6ctl vif; 1180 struct mf6cctl mfc; 1181 mifi_t mifi; 1182 1183 if (optname != MRT6_INIT) { 1184 if (sk != mroute6_socket && !capable(CAP_NET_ADMIN)) 1185 return -EACCES; 1186 } 1187 1188 switch (optname) { 1189 case MRT6_INIT: 1190 if (sk->sk_type != SOCK_RAW || 1191 inet_sk(sk)->num != IPPROTO_ICMPV6) 1192 return -EOPNOTSUPP; 1193 if (optlen < sizeof(int)) 1194 return -EINVAL; 1195 1196 return ip6mr_sk_init(sk); 1197 1198 case MRT6_DONE: 1199 return ip6mr_sk_done(sk); 1200 1201 case MRT6_ADD_MIF: 1202 if (optlen < sizeof(vif)) 1203 return -EINVAL; 1204 if (copy_from_user(&vif, optval, sizeof(vif))) 1205 return -EFAULT; 1206 if (vif.mif6c_mifi >= MAXMIFS) 1207 return -ENFILE; 1208 rtnl_lock(); 1209 ret = mif6_add(&vif, sk == mroute6_socket); 1210 rtnl_unlock(); 1211 return ret; 1212 1213 case MRT6_DEL_MIF: 1214 if (optlen < sizeof(mifi_t)) 1215 return -EINVAL; 1216 if (copy_from_user(&mifi, optval, sizeof(mifi_t))) 1217 return -EFAULT; 1218 rtnl_lock(); 1219 ret = mif6_delete(mifi); 1220 rtnl_unlock(); 1221 return ret; 1222 1223 /* 1224 * Manipulate the forwarding caches. These live 1225 * in a sort of kernel/user symbiosis. 1226 */ 1227 case MRT6_ADD_MFC: 1228 case MRT6_DEL_MFC: 1229 if (optlen < sizeof(mfc)) 1230 return -EINVAL; 1231 if (copy_from_user(&mfc, optval, sizeof(mfc))) 1232 return -EFAULT; 1233 rtnl_lock(); 1234 if (optname == MRT6_DEL_MFC) 1235 ret = ip6mr_mfc_delete(&mfc); 1236 else 1237 ret = ip6mr_mfc_add(&mfc, sk == mroute6_socket); 1238 rtnl_unlock(); 1239 return ret; 1240 1241 /* 1242 * Control PIM assert (to activate pim will activate assert) 1243 */ 1244 case MRT6_ASSERT: 1245 { 1246 int v; 1247 if (get_user(v, (int __user *)optval)) 1248 return -EFAULT; 1249 mroute_do_assert = !!v; 1250 return 0; 1251 } 1252 1253 #ifdef CONFIG_IPV6_PIMSM_V2 1254 case MRT6_PIM: 1255 { 1256 int v; 1257 if (get_user(v, (int __user *)optval)) 1258 return -EFAULT; 1259 v = !!v; 1260 rtnl_lock(); 1261 ret = 0; 1262 if (v != mroute_do_pim) { 1263 mroute_do_pim = v; 1264 mroute_do_assert = v; 1265 if (mroute_do_pim) 1266 ret = inet6_add_protocol(&pim6_protocol, 1267 IPPROTO_PIM); 1268 else 1269 ret = inet6_del_protocol(&pim6_protocol, 1270 IPPROTO_PIM); 1271 if (ret < 0) 1272 ret = -EAGAIN; 1273 } 1274 rtnl_unlock(); 1275 return ret; 1276 } 1277 1278 #endif 1279 /* 1280 * Spurious command, or MRT6_VERSION which you cannot 1281 * set. 1282 */ 1283 default: 1284 return -ENOPROTOOPT; 1285 } 1286 } 1287 1288 /* 1289 * Getsock opt support for the multicast routing system. 1290 */ 1291 1292 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, 1293 int __user *optlen) 1294 { 1295 int olr; 1296 int val; 1297 1298 switch (optname) { 1299 case MRT6_VERSION: 1300 val = 0x0305; 1301 break; 1302 #ifdef CONFIG_IPV6_PIMSM_V2 1303 case MRT6_PIM: 1304 val = mroute_do_pim; 1305 break; 1306 #endif 1307 case MRT6_ASSERT: 1308 val = mroute_do_assert; 1309 break; 1310 default: 1311 return -ENOPROTOOPT; 1312 } 1313 1314 if (get_user(olr, optlen)) 1315 return -EFAULT; 1316 1317 olr = min_t(int, olr, sizeof(int)); 1318 if (olr < 0) 1319 return -EINVAL; 1320 1321 if (put_user(olr, optlen)) 1322 return -EFAULT; 1323 if (copy_to_user(optval, &val, olr)) 1324 return -EFAULT; 1325 return 0; 1326 } 1327 1328 /* 1329 * The IP multicast ioctl support routines. 1330 */ 1331 1332 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) 1333 { 1334 struct sioc_sg_req6 sr; 1335 struct sioc_mif_req6 vr; 1336 struct mif_device *vif; 1337 struct mfc6_cache *c; 1338 1339 switch (cmd) { 1340 case SIOCGETMIFCNT_IN6: 1341 if (copy_from_user(&vr, arg, sizeof(vr))) 1342 return -EFAULT; 1343 if (vr.mifi >= maxvif) 1344 return -EINVAL; 1345 read_lock(&mrt_lock); 1346 vif = &vif6_table[vr.mifi]; 1347 if (MIF_EXISTS(vr.mifi)) { 1348 vr.icount = vif->pkt_in; 1349 vr.ocount = vif->pkt_out; 1350 vr.ibytes = vif->bytes_in; 1351 vr.obytes = vif->bytes_out; 1352 read_unlock(&mrt_lock); 1353 1354 if (copy_to_user(arg, &vr, sizeof(vr))) 1355 return -EFAULT; 1356 return 0; 1357 } 1358 read_unlock(&mrt_lock); 1359 return -EADDRNOTAVAIL; 1360 case SIOCGETSGCNT_IN6: 1361 if (copy_from_user(&sr, arg, sizeof(sr))) 1362 return -EFAULT; 1363 1364 read_lock(&mrt_lock); 1365 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr); 1366 if (c) { 1367 sr.pktcnt = c->mfc_un.res.pkt; 1368 sr.bytecnt = c->mfc_un.res.bytes; 1369 sr.wrong_if = c->mfc_un.res.wrong_if; 1370 read_unlock(&mrt_lock); 1371 1372 if (copy_to_user(arg, &sr, sizeof(sr))) 1373 return -EFAULT; 1374 return 0; 1375 } 1376 read_unlock(&mrt_lock); 1377 return -EADDRNOTAVAIL; 1378 default: 1379 return -ENOIOCTLCMD; 1380 } 1381 } 1382 1383 1384 static inline int ip6mr_forward2_finish(struct sk_buff *skb) 1385 { 1386 IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst), 1387 IPSTATS_MIB_OUTFORWDATAGRAMS); 1388 return dst_output(skb); 1389 } 1390 1391 /* 1392 * Processing handlers for ip6mr_forward 1393 */ 1394 1395 static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) 1396 { 1397 struct ipv6hdr *ipv6h; 1398 struct mif_device *vif = &vif6_table[vifi]; 1399 struct net_device *dev; 1400 struct dst_entry *dst; 1401 struct flowi fl; 1402 1403 if (vif->dev == NULL) 1404 goto out_free; 1405 1406 #ifdef CONFIG_IPV6_PIMSM_V2 1407 if (vif->flags & MIFF_REGISTER) { 1408 vif->pkt_out++; 1409 vif->bytes_out += skb->len; 1410 vif->dev->stats.tx_bytes += skb->len; 1411 vif->dev->stats.tx_packets++; 1412 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT); 1413 kfree_skb(skb); 1414 return 0; 1415 } 1416 #endif 1417 1418 ipv6h = ipv6_hdr(skb); 1419 1420 fl = (struct flowi) { 1421 .oif = vif->link, 1422 .nl_u = { .ip6_u = 1423 { .daddr = ipv6h->daddr, } 1424 } 1425 }; 1426 1427 dst = ip6_route_output(&init_net, NULL, &fl); 1428 if (!dst) 1429 goto out_free; 1430 1431 dst_release(skb->dst); 1432 skb->dst = dst; 1433 1434 /* 1435 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1436 * not only before forwarding, but after forwarding on all output 1437 * interfaces. It is clear, if mrouter runs a multicasting 1438 * program, it should receive packets not depending to what interface 1439 * program is joined. 1440 * If we will not make it, the program will have to join on all 1441 * interfaces. On the other hand, multihoming host (or router, but 1442 * not mrouter) cannot join to more than one interface - it will 1443 * result in receiving multiple packets. 1444 */ 1445 dev = vif->dev; 1446 skb->dev = dev; 1447 vif->pkt_out++; 1448 vif->bytes_out += skb->len; 1449 1450 /* We are about to write */ 1451 /* XXX: extension headers? */ 1452 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev))) 1453 goto out_free; 1454 1455 ipv6h = ipv6_hdr(skb); 1456 ipv6h->hop_limit--; 1457 1458 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 1459 1460 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev, 1461 ip6mr_forward2_finish); 1462 1463 out_free: 1464 kfree_skb(skb); 1465 return 0; 1466 } 1467 1468 static int ip6mr_find_vif(struct net_device *dev) 1469 { 1470 int ct; 1471 for (ct = maxvif - 1; ct >= 0; ct--) { 1472 if (vif6_table[ct].dev == dev) 1473 break; 1474 } 1475 return ct; 1476 } 1477 1478 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache) 1479 { 1480 int psend = -1; 1481 int vif, ct; 1482 1483 vif = cache->mf6c_parent; 1484 cache->mfc_un.res.pkt++; 1485 cache->mfc_un.res.bytes += skb->len; 1486 1487 /* 1488 * Wrong interface: drop packet and (maybe) send PIM assert. 1489 */ 1490 if (vif6_table[vif].dev != skb->dev) { 1491 int true_vifi; 1492 1493 cache->mfc_un.res.wrong_if++; 1494 true_vifi = ip6mr_find_vif(skb->dev); 1495 1496 if (true_vifi >= 0 && mroute_do_assert && 1497 /* pimsm uses asserts, when switching from RPT to SPT, 1498 so that we cannot check that packet arrived on an oif. 1499 It is bad, but otherwise we would need to move pretty 1500 large chunk of pimd to kernel. Ough... --ANK 1501 */ 1502 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && 1503 time_after(jiffies, 1504 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 1505 cache->mfc_un.res.last_assert = jiffies; 1506 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF); 1507 } 1508 goto dont_forward; 1509 } 1510 1511 vif6_table[vif].pkt_in++; 1512 vif6_table[vif].bytes_in += skb->len; 1513 1514 /* 1515 * Forward the frame 1516 */ 1517 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { 1518 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { 1519 if (psend != -1) { 1520 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1521 if (skb2) 1522 ip6mr_forward2(skb2, cache, psend); 1523 } 1524 psend = ct; 1525 } 1526 } 1527 if (psend != -1) { 1528 ip6mr_forward2(skb, cache, psend); 1529 return 0; 1530 } 1531 1532 dont_forward: 1533 kfree_skb(skb); 1534 return 0; 1535 } 1536 1537 1538 /* 1539 * Multicast packets for forwarding arrive here 1540 */ 1541 1542 int ip6_mr_input(struct sk_buff *skb) 1543 { 1544 struct mfc6_cache *cache; 1545 1546 read_lock(&mrt_lock); 1547 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 1548 1549 /* 1550 * No usable cache entry 1551 */ 1552 if (cache == NULL) { 1553 int vif; 1554 1555 vif = ip6mr_find_vif(skb->dev); 1556 if (vif >= 0) { 1557 int err = ip6mr_cache_unresolved(vif, skb); 1558 read_unlock(&mrt_lock); 1559 1560 return err; 1561 } 1562 read_unlock(&mrt_lock); 1563 kfree_skb(skb); 1564 return -ENODEV; 1565 } 1566 1567 ip6_mr_forward(skb, cache); 1568 1569 read_unlock(&mrt_lock); 1570 1571 return 0; 1572 } 1573 1574 1575 static int 1576 ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm) 1577 { 1578 int ct; 1579 struct rtnexthop *nhp; 1580 struct net_device *dev = vif6_table[c->mf6c_parent].dev; 1581 u8 *b = skb_tail_pointer(skb); 1582 struct rtattr *mp_head; 1583 1584 if (dev) 1585 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); 1586 1587 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); 1588 1589 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 1590 if (c->mfc_un.res.ttls[ct] < 255) { 1591 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 1592 goto rtattr_failure; 1593 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 1594 nhp->rtnh_flags = 0; 1595 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 1596 nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex; 1597 nhp->rtnh_len = sizeof(*nhp); 1598 } 1599 } 1600 mp_head->rta_type = RTA_MULTIPATH; 1601 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; 1602 rtm->rtm_type = RTN_MULTICAST; 1603 return 1; 1604 1605 rtattr_failure: 1606 nlmsg_trim(skb, b); 1607 return -EMSGSIZE; 1608 } 1609 1610 int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) 1611 { 1612 int err; 1613 struct mfc6_cache *cache; 1614 struct rt6_info *rt = (struct rt6_info *)skb->dst; 1615 1616 read_lock(&mrt_lock); 1617 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr); 1618 1619 if (!cache) { 1620 struct sk_buff *skb2; 1621 struct ipv6hdr *iph; 1622 struct net_device *dev; 1623 int vif; 1624 1625 if (nowait) { 1626 read_unlock(&mrt_lock); 1627 return -EAGAIN; 1628 } 1629 1630 dev = skb->dev; 1631 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) { 1632 read_unlock(&mrt_lock); 1633 return -ENODEV; 1634 } 1635 1636 /* really correct? */ 1637 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); 1638 if (!skb2) { 1639 read_unlock(&mrt_lock); 1640 return -ENOMEM; 1641 } 1642 1643 skb_reset_transport_header(skb2); 1644 1645 skb_put(skb2, sizeof(struct ipv6hdr)); 1646 skb_reset_network_header(skb2); 1647 1648 iph = ipv6_hdr(skb2); 1649 iph->version = 0; 1650 iph->priority = 0; 1651 iph->flow_lbl[0] = 0; 1652 iph->flow_lbl[1] = 0; 1653 iph->flow_lbl[2] = 0; 1654 iph->payload_len = 0; 1655 iph->nexthdr = IPPROTO_NONE; 1656 iph->hop_limit = 0; 1657 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr); 1658 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr); 1659 1660 err = ip6mr_cache_unresolved(vif, skb2); 1661 read_unlock(&mrt_lock); 1662 1663 return err; 1664 } 1665 1666 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 1667 cache->mfc_flags |= MFC_NOTIFY; 1668 1669 err = ip6mr_fill_mroute(skb, cache, rtm); 1670 read_unlock(&mrt_lock); 1671 return err; 1672 } 1673 1674