1 /* 2 * Linux IPv6 multicast routing support for BSD pim6sd 3 * Based on net/ipv4/ipmr.c. 4 * 5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> 6 * LSIIT Laboratory, Strasbourg, France 7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> 8 * 6WIND, Paris, France 9 * Copyright (C)2007,2008 USAGI/WIDE Project 10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 * 17 */ 18 19 #include <linux/uaccess.h> 20 #include <linux/types.h> 21 #include <linux/sched.h> 22 #include <linux/errno.h> 23 #include <linux/timer.h> 24 #include <linux/mm.h> 25 #include <linux/kernel.h> 26 #include <linux/fcntl.h> 27 #include <linux/stat.h> 28 #include <linux/socket.h> 29 #include <linux/inet.h> 30 #include <linux/netdevice.h> 31 #include <linux/inetdevice.h> 32 #include <linux/proc_fs.h> 33 #include <linux/seq_file.h> 34 #include <linux/init.h> 35 #include <linux/slab.h> 36 #include <linux/compat.h> 37 #include <net/protocol.h> 38 #include <linux/skbuff.h> 39 #include <net/sock.h> 40 #include <net/raw.h> 41 #include <linux/notifier.h> 42 #include <linux/if_arp.h> 43 #include <net/checksum.h> 44 #include <net/netlink.h> 45 #include <net/fib_rules.h> 46 47 #include <net/ipv6.h> 48 #include <net/ip6_route.h> 49 #include <linux/mroute6.h> 50 #include <linux/pim.h> 51 #include <net/addrconf.h> 52 #include <linux/netfilter_ipv6.h> 53 #include <linux/export.h> 54 #include <net/ip6_checksum.h> 55 #include <linux/netconf.h> 56 57 struct mr6_table { 58 struct list_head list; 59 possible_net_t net; 60 u32 id; 61 struct sock *mroute6_sk; 62 struct timer_list ipmr_expire_timer; 63 struct list_head mfc6_unres_queue; 64 struct list_head mfc6_cache_array[MFC6_LINES]; 65 struct vif_device vif6_table[MAXMIFS]; 66 int maxvif; 67 atomic_t cache_resolve_queue_len; 68 bool mroute_do_assert; 69 bool mroute_do_pim; 70 #ifdef CONFIG_IPV6_PIMSM_V2 71 int mroute_reg_vif_num; 72 #endif 73 }; 74 75 struct ip6mr_rule { 76 struct fib_rule common; 77 }; 78 79 struct ip6mr_result { 80 struct mr6_table *mrt; 81 }; 82 83 /* Big lock, protecting vif table, mrt cache and mroute socket state. 84 Note that the changes are semaphored via rtnl_lock. 85 */ 86 87 static DEFINE_RWLOCK(mrt_lock); 88 89 /* 90 * Multicast router control variables 91 */ 92 93 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL) 94 95 /* Special spinlock for queue of unresolved entries */ 96 static DEFINE_SPINLOCK(mfc_unres_lock); 97 98 /* We return to original Alan's scheme. Hash table of resolved 99 entries is changed only in process context and protected 100 with weak lock mrt_lock. Queue of unresolved entries is protected 101 with strong spinlock mfc_unres_lock. 102 103 In this case data path is free of exclusive locks at all. 104 */ 105 106 static struct kmem_cache *mrt_cachep __read_mostly; 107 108 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id); 109 static void ip6mr_free_table(struct mr6_table *mrt); 110 111 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt, 112 struct sk_buff *skb, struct mfc6_cache *cache); 113 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, 114 mifi_t mifi, int assert); 115 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, 116 struct mfc6_cache *c, struct rtmsg *rtm); 117 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, 118 int cmd); 119 static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt); 120 static int ip6mr_rtm_dumproute(struct sk_buff *skb, 121 struct netlink_callback *cb); 122 static void mroute_clean_tables(struct mr6_table *mrt, bool all); 123 static void ipmr_expire_process(struct timer_list *t); 124 125 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 126 #define ip6mr_for_each_table(mrt, net) \ 127 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) 128 129 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) 130 { 131 struct mr6_table *mrt; 132 133 ip6mr_for_each_table(mrt, net) { 134 if (mrt->id == id) 135 return mrt; 136 } 137 return NULL; 138 } 139 140 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 141 struct mr6_table **mrt) 142 { 143 int err; 144 struct ip6mr_result res; 145 struct fib_lookup_arg arg = { 146 .result = &res, 147 .flags = FIB_LOOKUP_NOREF, 148 }; 149 150 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, 151 flowi6_to_flowi(flp6), 0, &arg); 152 if (err < 0) 153 return err; 154 *mrt = res.mrt; 155 return 0; 156 } 157 158 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, 159 int flags, struct fib_lookup_arg *arg) 160 { 161 struct ip6mr_result *res = arg->result; 162 struct mr6_table *mrt; 163 164 switch (rule->action) { 165 case FR_ACT_TO_TBL: 166 break; 167 case FR_ACT_UNREACHABLE: 168 return -ENETUNREACH; 169 case FR_ACT_PROHIBIT: 170 return -EACCES; 171 case FR_ACT_BLACKHOLE: 172 default: 173 return -EINVAL; 174 } 175 176 mrt = ip6mr_get_table(rule->fr_net, rule->table); 177 if (!mrt) 178 return -EAGAIN; 179 res->mrt = mrt; 180 return 0; 181 } 182 183 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) 184 { 185 return 1; 186 } 187 188 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = { 189 FRA_GENERIC_POLICY, 190 }; 191 192 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 193 struct fib_rule_hdr *frh, struct nlattr **tb) 194 { 195 return 0; 196 } 197 198 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 199 struct nlattr **tb) 200 { 201 return 1; 202 } 203 204 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 205 struct fib_rule_hdr *frh) 206 { 207 frh->dst_len = 0; 208 frh->src_len = 0; 209 frh->tos = 0; 210 return 0; 211 } 212 213 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { 214 .family = RTNL_FAMILY_IP6MR, 215 .rule_size = sizeof(struct ip6mr_rule), 216 .addr_size = sizeof(struct in6_addr), 217 .action = ip6mr_rule_action, 218 .match = ip6mr_rule_match, 219 .configure = ip6mr_rule_configure, 220 .compare = ip6mr_rule_compare, 221 .fill = ip6mr_rule_fill, 222 .nlgroup = RTNLGRP_IPV6_RULE, 223 .policy = ip6mr_rule_policy, 224 .owner = THIS_MODULE, 225 }; 226 227 static int __net_init ip6mr_rules_init(struct net *net) 228 { 229 struct fib_rules_ops *ops; 230 struct mr6_table *mrt; 231 int err; 232 233 ops = fib_rules_register(&ip6mr_rules_ops_template, net); 234 if (IS_ERR(ops)) 235 return PTR_ERR(ops); 236 237 INIT_LIST_HEAD(&net->ipv6.mr6_tables); 238 239 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 240 if (!mrt) { 241 err = -ENOMEM; 242 goto err1; 243 } 244 245 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0); 246 if (err < 0) 247 goto err2; 248 249 net->ipv6.mr6_rules_ops = ops; 250 return 0; 251 252 err2: 253 ip6mr_free_table(mrt); 254 err1: 255 fib_rules_unregister(ops); 256 return err; 257 } 258 259 static void __net_exit ip6mr_rules_exit(struct net *net) 260 { 261 struct mr6_table *mrt, *next; 262 263 rtnl_lock(); 264 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { 265 list_del(&mrt->list); 266 ip6mr_free_table(mrt); 267 } 268 fib_rules_unregister(net->ipv6.mr6_rules_ops); 269 rtnl_unlock(); 270 } 271 #else 272 #define ip6mr_for_each_table(mrt, net) \ 273 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) 274 275 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) 276 { 277 return net->ipv6.mrt6; 278 } 279 280 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 281 struct mr6_table **mrt) 282 { 283 *mrt = net->ipv6.mrt6; 284 return 0; 285 } 286 287 static int __net_init ip6mr_rules_init(struct net *net) 288 { 289 net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT); 290 return net->ipv6.mrt6 ? 0 : -ENOMEM; 291 } 292 293 static void __net_exit ip6mr_rules_exit(struct net *net) 294 { 295 rtnl_lock(); 296 ip6mr_free_table(net->ipv6.mrt6); 297 net->ipv6.mrt6 = NULL; 298 rtnl_unlock(); 299 } 300 #endif 301 302 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) 303 { 304 struct mr6_table *mrt; 305 unsigned int i; 306 307 mrt = ip6mr_get_table(net, id); 308 if (mrt) 309 return mrt; 310 311 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); 312 if (!mrt) 313 return NULL; 314 mrt->id = id; 315 write_pnet(&mrt->net, net); 316 317 /* Forwarding cache */ 318 for (i = 0; i < MFC6_LINES; i++) 319 INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]); 320 321 INIT_LIST_HEAD(&mrt->mfc6_unres_queue); 322 323 timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0); 324 325 #ifdef CONFIG_IPV6_PIMSM_V2 326 mrt->mroute_reg_vif_num = -1; 327 #endif 328 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 329 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); 330 #endif 331 return mrt; 332 } 333 334 static void ip6mr_free_table(struct mr6_table *mrt) 335 { 336 del_timer_sync(&mrt->ipmr_expire_timer); 337 mroute_clean_tables(mrt, true); 338 kfree(mrt); 339 } 340 341 #ifdef CONFIG_PROC_FS 342 343 struct ipmr_mfc_iter { 344 struct seq_net_private p; 345 struct mr6_table *mrt; 346 struct list_head *cache; 347 int ct; 348 }; 349 350 351 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net, 352 struct ipmr_mfc_iter *it, loff_t pos) 353 { 354 struct mr6_table *mrt = it->mrt; 355 struct mfc6_cache *mfc; 356 357 read_lock(&mrt_lock); 358 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) { 359 it->cache = &mrt->mfc6_cache_array[it->ct]; 360 list_for_each_entry(mfc, it->cache, list) 361 if (pos-- == 0) 362 return mfc; 363 } 364 read_unlock(&mrt_lock); 365 366 spin_lock_bh(&mfc_unres_lock); 367 it->cache = &mrt->mfc6_unres_queue; 368 list_for_each_entry(mfc, it->cache, list) 369 if (pos-- == 0) 370 return mfc; 371 spin_unlock_bh(&mfc_unres_lock); 372 373 it->cache = NULL; 374 return NULL; 375 } 376 377 /* 378 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif 379 */ 380 381 struct ipmr_vif_iter { 382 struct seq_net_private p; 383 struct mr6_table *mrt; 384 int ct; 385 }; 386 387 static struct vif_device *ip6mr_vif_seq_idx(struct net *net, 388 struct ipmr_vif_iter *iter, 389 loff_t pos) 390 { 391 struct mr6_table *mrt = iter->mrt; 392 393 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { 394 if (!MIF_EXISTS(mrt, iter->ct)) 395 continue; 396 if (pos-- == 0) 397 return &mrt->vif6_table[iter->ct]; 398 } 399 return NULL; 400 } 401 402 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) 403 __acquires(mrt_lock) 404 { 405 struct ipmr_vif_iter *iter = seq->private; 406 struct net *net = seq_file_net(seq); 407 struct mr6_table *mrt; 408 409 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 410 if (!mrt) 411 return ERR_PTR(-ENOENT); 412 413 iter->mrt = mrt; 414 415 read_lock(&mrt_lock); 416 return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1) 417 : SEQ_START_TOKEN; 418 } 419 420 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 421 { 422 struct ipmr_vif_iter *iter = seq->private; 423 struct net *net = seq_file_net(seq); 424 struct mr6_table *mrt = iter->mrt; 425 426 ++*pos; 427 if (v == SEQ_START_TOKEN) 428 return ip6mr_vif_seq_idx(net, iter, 0); 429 430 while (++iter->ct < mrt->maxvif) { 431 if (!MIF_EXISTS(mrt, iter->ct)) 432 continue; 433 return &mrt->vif6_table[iter->ct]; 434 } 435 return NULL; 436 } 437 438 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) 439 __releases(mrt_lock) 440 { 441 read_unlock(&mrt_lock); 442 } 443 444 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) 445 { 446 struct ipmr_vif_iter *iter = seq->private; 447 struct mr6_table *mrt = iter->mrt; 448 449 if (v == SEQ_START_TOKEN) { 450 seq_puts(seq, 451 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); 452 } else { 453 const struct vif_device *vif = v; 454 const char *name = vif->dev ? vif->dev->name : "none"; 455 456 seq_printf(seq, 457 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", 458 vif - mrt->vif6_table, 459 name, vif->bytes_in, vif->pkt_in, 460 vif->bytes_out, vif->pkt_out, 461 vif->flags); 462 } 463 return 0; 464 } 465 466 static const struct seq_operations ip6mr_vif_seq_ops = { 467 .start = ip6mr_vif_seq_start, 468 .next = ip6mr_vif_seq_next, 469 .stop = ip6mr_vif_seq_stop, 470 .show = ip6mr_vif_seq_show, 471 }; 472 473 static int ip6mr_vif_open(struct inode *inode, struct file *file) 474 { 475 return seq_open_net(inode, file, &ip6mr_vif_seq_ops, 476 sizeof(struct ipmr_vif_iter)); 477 } 478 479 static const struct file_operations ip6mr_vif_fops = { 480 .open = ip6mr_vif_open, 481 .read = seq_read, 482 .llseek = seq_lseek, 483 .release = seq_release_net, 484 }; 485 486 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 487 { 488 struct ipmr_mfc_iter *it = seq->private; 489 struct net *net = seq_file_net(seq); 490 struct mr6_table *mrt; 491 492 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 493 if (!mrt) 494 return ERR_PTR(-ENOENT); 495 496 it->mrt = mrt; 497 it->cache = NULL; 498 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) 499 : SEQ_START_TOKEN; 500 } 501 502 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 503 { 504 struct mfc6_cache *mfc = v; 505 struct ipmr_mfc_iter *it = seq->private; 506 struct net *net = seq_file_net(seq); 507 struct mr6_table *mrt = it->mrt; 508 509 ++*pos; 510 511 if (v == SEQ_START_TOKEN) 512 return ipmr_mfc_seq_idx(net, seq->private, 0); 513 514 if (mfc->list.next != it->cache) 515 return list_entry(mfc->list.next, struct mfc6_cache, list); 516 517 if (it->cache == &mrt->mfc6_unres_queue) 518 goto end_of_list; 519 520 BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]); 521 522 while (++it->ct < MFC6_LINES) { 523 it->cache = &mrt->mfc6_cache_array[it->ct]; 524 if (list_empty(it->cache)) 525 continue; 526 return list_first_entry(it->cache, struct mfc6_cache, list); 527 } 528 529 /* exhausted cache_array, show unresolved */ 530 read_unlock(&mrt_lock); 531 it->cache = &mrt->mfc6_unres_queue; 532 it->ct = 0; 533 534 spin_lock_bh(&mfc_unres_lock); 535 if (!list_empty(it->cache)) 536 return list_first_entry(it->cache, struct mfc6_cache, list); 537 538 end_of_list: 539 spin_unlock_bh(&mfc_unres_lock); 540 it->cache = NULL; 541 542 return NULL; 543 } 544 545 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 546 { 547 struct ipmr_mfc_iter *it = seq->private; 548 struct mr6_table *mrt = it->mrt; 549 550 if (it->cache == &mrt->mfc6_unres_queue) 551 spin_unlock_bh(&mfc_unres_lock); 552 else if (it->cache == &mrt->mfc6_cache_array[it->ct]) 553 read_unlock(&mrt_lock); 554 } 555 556 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 557 { 558 int n; 559 560 if (v == SEQ_START_TOKEN) { 561 seq_puts(seq, 562 "Group " 563 "Origin " 564 "Iif Pkts Bytes Wrong Oifs\n"); 565 } else { 566 const struct mfc6_cache *mfc = v; 567 const struct ipmr_mfc_iter *it = seq->private; 568 struct mr6_table *mrt = it->mrt; 569 570 seq_printf(seq, "%pI6 %pI6 %-3hd", 571 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, 572 mfc->mf6c_parent); 573 574 if (it->cache != &mrt->mfc6_unres_queue) { 575 seq_printf(seq, " %8lu %8lu %8lu", 576 mfc->mfc_un.res.pkt, 577 mfc->mfc_un.res.bytes, 578 mfc->mfc_un.res.wrong_if); 579 for (n = mfc->mfc_un.res.minvif; 580 n < mfc->mfc_un.res.maxvif; n++) { 581 if (MIF_EXISTS(mrt, n) && 582 mfc->mfc_un.res.ttls[n] < 255) 583 seq_printf(seq, 584 " %2d:%-3d", 585 n, mfc->mfc_un.res.ttls[n]); 586 } 587 } else { 588 /* unresolved mfc_caches don't contain 589 * pkt, bytes and wrong_if values 590 */ 591 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 592 } 593 seq_putc(seq, '\n'); 594 } 595 return 0; 596 } 597 598 static const struct seq_operations ipmr_mfc_seq_ops = { 599 .start = ipmr_mfc_seq_start, 600 .next = ipmr_mfc_seq_next, 601 .stop = ipmr_mfc_seq_stop, 602 .show = ipmr_mfc_seq_show, 603 }; 604 605 static int ipmr_mfc_open(struct inode *inode, struct file *file) 606 { 607 return seq_open_net(inode, file, &ipmr_mfc_seq_ops, 608 sizeof(struct ipmr_mfc_iter)); 609 } 610 611 static const struct file_operations ip6mr_mfc_fops = { 612 .open = ipmr_mfc_open, 613 .read = seq_read, 614 .llseek = seq_lseek, 615 .release = seq_release_net, 616 }; 617 #endif 618 619 #ifdef CONFIG_IPV6_PIMSM_V2 620 621 static int pim6_rcv(struct sk_buff *skb) 622 { 623 struct pimreghdr *pim; 624 struct ipv6hdr *encap; 625 struct net_device *reg_dev = NULL; 626 struct net *net = dev_net(skb->dev); 627 struct mr6_table *mrt; 628 struct flowi6 fl6 = { 629 .flowi6_iif = skb->dev->ifindex, 630 .flowi6_mark = skb->mark, 631 }; 632 int reg_vif_num; 633 634 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 635 goto drop; 636 637 pim = (struct pimreghdr *)skb_transport_header(skb); 638 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || 639 (pim->flags & PIM_NULL_REGISTER) || 640 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 641 sizeof(*pim), IPPROTO_PIM, 642 csum_partial((void *)pim, sizeof(*pim), 0)) && 643 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 644 goto drop; 645 646 /* check if the inner packet is destined to mcast group */ 647 encap = (struct ipv6hdr *)(skb_transport_header(skb) + 648 sizeof(*pim)); 649 650 if (!ipv6_addr_is_multicast(&encap->daddr) || 651 encap->payload_len == 0 || 652 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 653 goto drop; 654 655 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 656 goto drop; 657 reg_vif_num = mrt->mroute_reg_vif_num; 658 659 read_lock(&mrt_lock); 660 if (reg_vif_num >= 0) 661 reg_dev = mrt->vif6_table[reg_vif_num].dev; 662 if (reg_dev) 663 dev_hold(reg_dev); 664 read_unlock(&mrt_lock); 665 666 if (!reg_dev) 667 goto drop; 668 669 skb->mac_header = skb->network_header; 670 skb_pull(skb, (u8 *)encap - skb->data); 671 skb_reset_network_header(skb); 672 skb->protocol = htons(ETH_P_IPV6); 673 skb->ip_summed = CHECKSUM_NONE; 674 675 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 676 677 netif_rx(skb); 678 679 dev_put(reg_dev); 680 return 0; 681 drop: 682 kfree_skb(skb); 683 return 0; 684 } 685 686 static const struct inet6_protocol pim6_protocol = { 687 .handler = pim6_rcv, 688 }; 689 690 /* Service routines creating virtual interfaces: PIMREG */ 691 692 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, 693 struct net_device *dev) 694 { 695 struct net *net = dev_net(dev); 696 struct mr6_table *mrt; 697 struct flowi6 fl6 = { 698 .flowi6_oif = dev->ifindex, 699 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 700 .flowi6_mark = skb->mark, 701 }; 702 int err; 703 704 err = ip6mr_fib_lookup(net, &fl6, &mrt); 705 if (err < 0) { 706 kfree_skb(skb); 707 return err; 708 } 709 710 read_lock(&mrt_lock); 711 dev->stats.tx_bytes += skb->len; 712 dev->stats.tx_packets++; 713 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT); 714 read_unlock(&mrt_lock); 715 kfree_skb(skb); 716 return NETDEV_TX_OK; 717 } 718 719 static int reg_vif_get_iflink(const struct net_device *dev) 720 { 721 return 0; 722 } 723 724 static const struct net_device_ops reg_vif_netdev_ops = { 725 .ndo_start_xmit = reg_vif_xmit, 726 .ndo_get_iflink = reg_vif_get_iflink, 727 }; 728 729 static void reg_vif_setup(struct net_device *dev) 730 { 731 dev->type = ARPHRD_PIMREG; 732 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; 733 dev->flags = IFF_NOARP; 734 dev->netdev_ops = ®_vif_netdev_ops; 735 dev->needs_free_netdev = true; 736 dev->features |= NETIF_F_NETNS_LOCAL; 737 } 738 739 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt) 740 { 741 struct net_device *dev; 742 char name[IFNAMSIZ]; 743 744 if (mrt->id == RT6_TABLE_DFLT) 745 sprintf(name, "pim6reg"); 746 else 747 sprintf(name, "pim6reg%u", mrt->id); 748 749 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 750 if (!dev) 751 return NULL; 752 753 dev_net_set(dev, net); 754 755 if (register_netdevice(dev)) { 756 free_netdev(dev); 757 return NULL; 758 } 759 760 if (dev_open(dev)) 761 goto failure; 762 763 dev_hold(dev); 764 return dev; 765 766 failure: 767 unregister_netdevice(dev); 768 return NULL; 769 } 770 #endif 771 772 /* 773 * Delete a VIF entry 774 */ 775 776 static int mif6_delete(struct mr6_table *mrt, int vifi, int notify, 777 struct list_head *head) 778 { 779 struct vif_device *v; 780 struct net_device *dev; 781 struct inet6_dev *in6_dev; 782 783 if (vifi < 0 || vifi >= mrt->maxvif) 784 return -EADDRNOTAVAIL; 785 786 v = &mrt->vif6_table[vifi]; 787 788 write_lock_bh(&mrt_lock); 789 dev = v->dev; 790 v->dev = NULL; 791 792 if (!dev) { 793 write_unlock_bh(&mrt_lock); 794 return -EADDRNOTAVAIL; 795 } 796 797 #ifdef CONFIG_IPV6_PIMSM_V2 798 if (vifi == mrt->mroute_reg_vif_num) 799 mrt->mroute_reg_vif_num = -1; 800 #endif 801 802 if (vifi + 1 == mrt->maxvif) { 803 int tmp; 804 for (tmp = vifi - 1; tmp >= 0; tmp--) { 805 if (MIF_EXISTS(mrt, tmp)) 806 break; 807 } 808 mrt->maxvif = tmp + 1; 809 } 810 811 write_unlock_bh(&mrt_lock); 812 813 dev_set_allmulti(dev, -1); 814 815 in6_dev = __in6_dev_get(dev); 816 if (in6_dev) { 817 in6_dev->cnf.mc_forwarding--; 818 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 819 NETCONFA_MC_FORWARDING, 820 dev->ifindex, &in6_dev->cnf); 821 } 822 823 if ((v->flags & MIFF_REGISTER) && !notify) 824 unregister_netdevice_queue(dev, head); 825 826 dev_put(dev); 827 return 0; 828 } 829 830 static inline void ip6mr_cache_free(struct mfc6_cache *c) 831 { 832 kmem_cache_free(mrt_cachep, c); 833 } 834 835 /* Destroy an unresolved cache entry, killing queued skbs 836 and reporting error to netlink readers. 837 */ 838 839 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) 840 { 841 struct net *net = read_pnet(&mrt->net); 842 struct sk_buff *skb; 843 844 atomic_dec(&mrt->cache_resolve_queue_len); 845 846 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { 847 if (ipv6_hdr(skb)->version == 0) { 848 struct nlmsghdr *nlh = skb_pull(skb, 849 sizeof(struct ipv6hdr)); 850 nlh->nlmsg_type = NLMSG_ERROR; 851 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 852 skb_trim(skb, nlh->nlmsg_len); 853 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT; 854 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 855 } else 856 kfree_skb(skb); 857 } 858 859 ip6mr_cache_free(c); 860 } 861 862 863 /* Timer process for all the unresolved queue. */ 864 865 static void ipmr_do_expire_process(struct mr6_table *mrt) 866 { 867 unsigned long now = jiffies; 868 unsigned long expires = 10 * HZ; 869 struct mfc6_cache *c, *next; 870 871 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) { 872 if (time_after(c->mfc_un.unres.expires, now)) { 873 /* not yet... */ 874 unsigned long interval = c->mfc_un.unres.expires - now; 875 if (interval < expires) 876 expires = interval; 877 continue; 878 } 879 880 list_del(&c->list); 881 mr6_netlink_event(mrt, c, RTM_DELROUTE); 882 ip6mr_destroy_unres(mrt, c); 883 } 884 885 if (!list_empty(&mrt->mfc6_unres_queue)) 886 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 887 } 888 889 static void ipmr_expire_process(struct timer_list *t) 890 { 891 struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer); 892 893 if (!spin_trylock(&mfc_unres_lock)) { 894 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); 895 return; 896 } 897 898 if (!list_empty(&mrt->mfc6_unres_queue)) 899 ipmr_do_expire_process(mrt); 900 901 spin_unlock(&mfc_unres_lock); 902 } 903 904 /* Fill oifs list. It is called under write locked mrt_lock. */ 905 906 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache, 907 unsigned char *ttls) 908 { 909 int vifi; 910 911 cache->mfc_un.res.minvif = MAXMIFS; 912 cache->mfc_un.res.maxvif = 0; 913 memset(cache->mfc_un.res.ttls, 255, MAXMIFS); 914 915 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 916 if (MIF_EXISTS(mrt, vifi) && 917 ttls[vifi] && ttls[vifi] < 255) { 918 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 919 if (cache->mfc_un.res.minvif > vifi) 920 cache->mfc_un.res.minvif = vifi; 921 if (cache->mfc_un.res.maxvif <= vifi) 922 cache->mfc_un.res.maxvif = vifi + 1; 923 } 924 } 925 cache->mfc_un.res.lastuse = jiffies; 926 } 927 928 static int mif6_add(struct net *net, struct mr6_table *mrt, 929 struct mif6ctl *vifc, int mrtsock) 930 { 931 int vifi = vifc->mif6c_mifi; 932 struct vif_device *v = &mrt->vif6_table[vifi]; 933 struct net_device *dev; 934 struct inet6_dev *in6_dev; 935 int err; 936 937 /* Is vif busy ? */ 938 if (MIF_EXISTS(mrt, vifi)) 939 return -EADDRINUSE; 940 941 switch (vifc->mif6c_flags) { 942 #ifdef CONFIG_IPV6_PIMSM_V2 943 case MIFF_REGISTER: 944 /* 945 * Special Purpose VIF in PIM 946 * All the packets will be sent to the daemon 947 */ 948 if (mrt->mroute_reg_vif_num >= 0) 949 return -EADDRINUSE; 950 dev = ip6mr_reg_vif(net, mrt); 951 if (!dev) 952 return -ENOBUFS; 953 err = dev_set_allmulti(dev, 1); 954 if (err) { 955 unregister_netdevice(dev); 956 dev_put(dev); 957 return err; 958 } 959 break; 960 #endif 961 case 0: 962 dev = dev_get_by_index(net, vifc->mif6c_pifi); 963 if (!dev) 964 return -EADDRNOTAVAIL; 965 err = dev_set_allmulti(dev, 1); 966 if (err) { 967 dev_put(dev); 968 return err; 969 } 970 break; 971 default: 972 return -EINVAL; 973 } 974 975 in6_dev = __in6_dev_get(dev); 976 if (in6_dev) { 977 in6_dev->cnf.mc_forwarding++; 978 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 979 NETCONFA_MC_FORWARDING, 980 dev->ifindex, &in6_dev->cnf); 981 } 982 983 /* Fill in the VIF structures */ 984 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold, 985 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0), 986 MIFF_REGISTER); 987 988 /* And finish update writing critical data */ 989 write_lock_bh(&mrt_lock); 990 v->dev = dev; 991 #ifdef CONFIG_IPV6_PIMSM_V2 992 if (v->flags & MIFF_REGISTER) 993 mrt->mroute_reg_vif_num = vifi; 994 #endif 995 if (vifi + 1 > mrt->maxvif) 996 mrt->maxvif = vifi + 1; 997 write_unlock_bh(&mrt_lock); 998 return 0; 999 } 1000 1001 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt, 1002 const struct in6_addr *origin, 1003 const struct in6_addr *mcastgrp) 1004 { 1005 int line = MFC6_HASH(mcastgrp, origin); 1006 struct mfc6_cache *c; 1007 1008 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { 1009 if (ipv6_addr_equal(&c->mf6c_origin, origin) && 1010 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) 1011 return c; 1012 } 1013 return NULL; 1014 } 1015 1016 /* Look for a (*,*,oif) entry */ 1017 static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt, 1018 mifi_t mifi) 1019 { 1020 int line = MFC6_HASH(&in6addr_any, &in6addr_any); 1021 struct mfc6_cache *c; 1022 1023 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) 1024 if (ipv6_addr_any(&c->mf6c_origin) && 1025 ipv6_addr_any(&c->mf6c_mcastgrp) && 1026 (c->mfc_un.res.ttls[mifi] < 255)) 1027 return c; 1028 1029 return NULL; 1030 } 1031 1032 /* Look for a (*,G) entry */ 1033 static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt, 1034 struct in6_addr *mcastgrp, 1035 mifi_t mifi) 1036 { 1037 int line = MFC6_HASH(mcastgrp, &in6addr_any); 1038 struct mfc6_cache *c, *proxy; 1039 1040 if (ipv6_addr_any(mcastgrp)) 1041 goto skip; 1042 1043 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) 1044 if (ipv6_addr_any(&c->mf6c_origin) && 1045 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) { 1046 if (c->mfc_un.res.ttls[mifi] < 255) 1047 return c; 1048 1049 /* It's ok if the mifi is part of the static tree */ 1050 proxy = ip6mr_cache_find_any_parent(mrt, 1051 c->mf6c_parent); 1052 if (proxy && proxy->mfc_un.res.ttls[mifi] < 255) 1053 return c; 1054 } 1055 1056 skip: 1057 return ip6mr_cache_find_any_parent(mrt, mifi); 1058 } 1059 1060 /* 1061 * Allocate a multicast cache entry 1062 */ 1063 static struct mfc6_cache *ip6mr_cache_alloc(void) 1064 { 1065 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 1066 if (!c) 1067 return NULL; 1068 c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 1069 c->mfc_un.res.minvif = MAXMIFS; 1070 return c; 1071 } 1072 1073 static struct mfc6_cache *ip6mr_cache_alloc_unres(void) 1074 { 1075 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1076 if (!c) 1077 return NULL; 1078 skb_queue_head_init(&c->mfc_un.unres.unresolved); 1079 c->mfc_un.unres.expires = jiffies + 10 * HZ; 1080 return c; 1081 } 1082 1083 /* 1084 * A cache entry has gone into a resolved state from queued 1085 */ 1086 1087 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, 1088 struct mfc6_cache *uc, struct mfc6_cache *c) 1089 { 1090 struct sk_buff *skb; 1091 1092 /* 1093 * Play the pending entries through our router 1094 */ 1095 1096 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 1097 if (ipv6_hdr(skb)->version == 0) { 1098 struct nlmsghdr *nlh = skb_pull(skb, 1099 sizeof(struct ipv6hdr)); 1100 1101 if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) { 1102 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1103 } else { 1104 nlh->nlmsg_type = NLMSG_ERROR; 1105 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1106 skb_trim(skb, nlh->nlmsg_len); 1107 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; 1108 } 1109 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1110 } else 1111 ip6_mr_forward(net, mrt, skb, c); 1112 } 1113 } 1114 1115 /* 1116 * Bounce a cache query up to pim6sd and netlink. 1117 * 1118 * Called under mrt_lock. 1119 */ 1120 1121 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, 1122 mifi_t mifi, int assert) 1123 { 1124 struct sk_buff *skb; 1125 struct mrt6msg *msg; 1126 int ret; 1127 1128 #ifdef CONFIG_IPV6_PIMSM_V2 1129 if (assert == MRT6MSG_WHOLEPKT) 1130 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) 1131 +sizeof(*msg)); 1132 else 1133 #endif 1134 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); 1135 1136 if (!skb) 1137 return -ENOBUFS; 1138 1139 /* I suppose that internal messages 1140 * do not require checksums */ 1141 1142 skb->ip_summed = CHECKSUM_UNNECESSARY; 1143 1144 #ifdef CONFIG_IPV6_PIMSM_V2 1145 if (assert == MRT6MSG_WHOLEPKT) { 1146 /* Ugly, but we have no choice with this interface. 1147 Duplicate old header, fix length etc. 1148 And all this only to mangle msg->im6_msgtype and 1149 to set msg->im6_mbz to "mbz" :-) 1150 */ 1151 skb_push(skb, -skb_network_offset(pkt)); 1152 1153 skb_push(skb, sizeof(*msg)); 1154 skb_reset_transport_header(skb); 1155 msg = (struct mrt6msg *)skb_transport_header(skb); 1156 msg->im6_mbz = 0; 1157 msg->im6_msgtype = MRT6MSG_WHOLEPKT; 1158 msg->im6_mif = mrt->mroute_reg_vif_num; 1159 msg->im6_pad = 0; 1160 msg->im6_src = ipv6_hdr(pkt)->saddr; 1161 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1162 1163 skb->ip_summed = CHECKSUM_UNNECESSARY; 1164 } else 1165 #endif 1166 { 1167 /* 1168 * Copy the IP header 1169 */ 1170 1171 skb_put(skb, sizeof(struct ipv6hdr)); 1172 skb_reset_network_header(skb); 1173 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); 1174 1175 /* 1176 * Add our header 1177 */ 1178 skb_put(skb, sizeof(*msg)); 1179 skb_reset_transport_header(skb); 1180 msg = (struct mrt6msg *)skb_transport_header(skb); 1181 1182 msg->im6_mbz = 0; 1183 msg->im6_msgtype = assert; 1184 msg->im6_mif = mifi; 1185 msg->im6_pad = 0; 1186 msg->im6_src = ipv6_hdr(pkt)->saddr; 1187 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1188 1189 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1190 skb->ip_summed = CHECKSUM_UNNECESSARY; 1191 } 1192 1193 if (!mrt->mroute6_sk) { 1194 kfree_skb(skb); 1195 return -EINVAL; 1196 } 1197 1198 mrt6msg_netlink_event(mrt, skb); 1199 1200 /* 1201 * Deliver to user space multicast routing algorithms 1202 */ 1203 ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb); 1204 if (ret < 0) { 1205 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n"); 1206 kfree_skb(skb); 1207 } 1208 1209 return ret; 1210 } 1211 1212 /* 1213 * Queue a packet for resolution. It gets locked cache entry! 1214 */ 1215 1216 static int 1217 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) 1218 { 1219 bool found = false; 1220 int err; 1221 struct mfc6_cache *c; 1222 1223 spin_lock_bh(&mfc_unres_lock); 1224 list_for_each_entry(c, &mrt->mfc6_unres_queue, list) { 1225 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && 1226 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { 1227 found = true; 1228 break; 1229 } 1230 } 1231 1232 if (!found) { 1233 /* 1234 * Create a new entry if allowable 1235 */ 1236 1237 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 1238 (c = ip6mr_cache_alloc_unres()) == NULL) { 1239 spin_unlock_bh(&mfc_unres_lock); 1240 1241 kfree_skb(skb); 1242 return -ENOBUFS; 1243 } 1244 1245 /* 1246 * Fill in the new cache entry 1247 */ 1248 c->mf6c_parent = -1; 1249 c->mf6c_origin = ipv6_hdr(skb)->saddr; 1250 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; 1251 1252 /* 1253 * Reflect first query at pim6sd 1254 */ 1255 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); 1256 if (err < 0) { 1257 /* If the report failed throw the cache entry 1258 out - Brad Parker 1259 */ 1260 spin_unlock_bh(&mfc_unres_lock); 1261 1262 ip6mr_cache_free(c); 1263 kfree_skb(skb); 1264 return err; 1265 } 1266 1267 atomic_inc(&mrt->cache_resolve_queue_len); 1268 list_add(&c->list, &mrt->mfc6_unres_queue); 1269 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1270 1271 ipmr_do_expire_process(mrt); 1272 } 1273 1274 /* 1275 * See if we can append the packet 1276 */ 1277 if (c->mfc_un.unres.unresolved.qlen > 3) { 1278 kfree_skb(skb); 1279 err = -ENOBUFS; 1280 } else { 1281 skb_queue_tail(&c->mfc_un.unres.unresolved, skb); 1282 err = 0; 1283 } 1284 1285 spin_unlock_bh(&mfc_unres_lock); 1286 return err; 1287 } 1288 1289 /* 1290 * MFC6 cache manipulation by user space 1291 */ 1292 1293 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc, 1294 int parent) 1295 { 1296 int line; 1297 struct mfc6_cache *c, *next; 1298 1299 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); 1300 1301 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) { 1302 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && 1303 ipv6_addr_equal(&c->mf6c_mcastgrp, 1304 &mfc->mf6cc_mcastgrp.sin6_addr) && 1305 (parent == -1 || parent == c->mf6c_parent)) { 1306 write_lock_bh(&mrt_lock); 1307 list_del(&c->list); 1308 write_unlock_bh(&mrt_lock); 1309 1310 mr6_netlink_event(mrt, c, RTM_DELROUTE); 1311 ip6mr_cache_free(c); 1312 return 0; 1313 } 1314 } 1315 return -ENOENT; 1316 } 1317 1318 static int ip6mr_device_event(struct notifier_block *this, 1319 unsigned long event, void *ptr) 1320 { 1321 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1322 struct net *net = dev_net(dev); 1323 struct mr6_table *mrt; 1324 struct vif_device *v; 1325 int ct; 1326 1327 if (event != NETDEV_UNREGISTER) 1328 return NOTIFY_DONE; 1329 1330 ip6mr_for_each_table(mrt, net) { 1331 v = &mrt->vif6_table[0]; 1332 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1333 if (v->dev == dev) 1334 mif6_delete(mrt, ct, 1, NULL); 1335 } 1336 } 1337 1338 return NOTIFY_DONE; 1339 } 1340 1341 static struct notifier_block ip6_mr_notifier = { 1342 .notifier_call = ip6mr_device_event 1343 }; 1344 1345 /* 1346 * Setup for IP multicast routing 1347 */ 1348 1349 static int __net_init ip6mr_net_init(struct net *net) 1350 { 1351 int err; 1352 1353 err = ip6mr_rules_init(net); 1354 if (err < 0) 1355 goto fail; 1356 1357 #ifdef CONFIG_PROC_FS 1358 err = -ENOMEM; 1359 if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops)) 1360 goto proc_vif_fail; 1361 if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops)) 1362 goto proc_cache_fail; 1363 #endif 1364 1365 return 0; 1366 1367 #ifdef CONFIG_PROC_FS 1368 proc_cache_fail: 1369 remove_proc_entry("ip6_mr_vif", net->proc_net); 1370 proc_vif_fail: 1371 ip6mr_rules_exit(net); 1372 #endif 1373 fail: 1374 return err; 1375 } 1376 1377 static void __net_exit ip6mr_net_exit(struct net *net) 1378 { 1379 #ifdef CONFIG_PROC_FS 1380 remove_proc_entry("ip6_mr_cache", net->proc_net); 1381 remove_proc_entry("ip6_mr_vif", net->proc_net); 1382 #endif 1383 ip6mr_rules_exit(net); 1384 } 1385 1386 static struct pernet_operations ip6mr_net_ops = { 1387 .init = ip6mr_net_init, 1388 .exit = ip6mr_net_exit, 1389 .async = true, 1390 }; 1391 1392 int __init ip6_mr_init(void) 1393 { 1394 int err; 1395 1396 mrt_cachep = kmem_cache_create("ip6_mrt_cache", 1397 sizeof(struct mfc6_cache), 1398 0, SLAB_HWCACHE_ALIGN, 1399 NULL); 1400 if (!mrt_cachep) 1401 return -ENOMEM; 1402 1403 err = register_pernet_subsys(&ip6mr_net_ops); 1404 if (err) 1405 goto reg_pernet_fail; 1406 1407 err = register_netdevice_notifier(&ip6_mr_notifier); 1408 if (err) 1409 goto reg_notif_fail; 1410 #ifdef CONFIG_IPV6_PIMSM_V2 1411 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { 1412 pr_err("%s: can't add PIM protocol\n", __func__); 1413 err = -EAGAIN; 1414 goto add_proto_fail; 1415 } 1416 #endif 1417 err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE, 1418 NULL, ip6mr_rtm_dumproute, 0); 1419 if (err == 0) 1420 return 0; 1421 1422 #ifdef CONFIG_IPV6_PIMSM_V2 1423 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1424 add_proto_fail: 1425 unregister_netdevice_notifier(&ip6_mr_notifier); 1426 #endif 1427 reg_notif_fail: 1428 unregister_pernet_subsys(&ip6mr_net_ops); 1429 reg_pernet_fail: 1430 kmem_cache_destroy(mrt_cachep); 1431 return err; 1432 } 1433 1434 void ip6_mr_cleanup(void) 1435 { 1436 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE); 1437 #ifdef CONFIG_IPV6_PIMSM_V2 1438 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1439 #endif 1440 unregister_netdevice_notifier(&ip6_mr_notifier); 1441 unregister_pernet_subsys(&ip6mr_net_ops); 1442 kmem_cache_destroy(mrt_cachep); 1443 } 1444 1445 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, 1446 struct mf6cctl *mfc, int mrtsock, int parent) 1447 { 1448 bool found = false; 1449 int line; 1450 struct mfc6_cache *uc, *c; 1451 unsigned char ttls[MAXMIFS]; 1452 int i; 1453 1454 if (mfc->mf6cc_parent >= MAXMIFS) 1455 return -ENFILE; 1456 1457 memset(ttls, 255, MAXMIFS); 1458 for (i = 0; i < MAXMIFS; i++) { 1459 if (IF_ISSET(i, &mfc->mf6cc_ifset)) 1460 ttls[i] = 1; 1461 1462 } 1463 1464 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); 1465 1466 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { 1467 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && 1468 ipv6_addr_equal(&c->mf6c_mcastgrp, 1469 &mfc->mf6cc_mcastgrp.sin6_addr) && 1470 (parent == -1 || parent == mfc->mf6cc_parent)) { 1471 found = true; 1472 break; 1473 } 1474 } 1475 1476 if (found) { 1477 write_lock_bh(&mrt_lock); 1478 c->mf6c_parent = mfc->mf6cc_parent; 1479 ip6mr_update_thresholds(mrt, c, ttls); 1480 if (!mrtsock) 1481 c->mfc_flags |= MFC_STATIC; 1482 write_unlock_bh(&mrt_lock); 1483 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1484 return 0; 1485 } 1486 1487 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && 1488 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) 1489 return -EINVAL; 1490 1491 c = ip6mr_cache_alloc(); 1492 if (!c) 1493 return -ENOMEM; 1494 1495 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; 1496 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; 1497 c->mf6c_parent = mfc->mf6cc_parent; 1498 ip6mr_update_thresholds(mrt, c, ttls); 1499 if (!mrtsock) 1500 c->mfc_flags |= MFC_STATIC; 1501 1502 write_lock_bh(&mrt_lock); 1503 list_add(&c->list, &mrt->mfc6_cache_array[line]); 1504 write_unlock_bh(&mrt_lock); 1505 1506 /* 1507 * Check to see if we resolved a queued list. If so we 1508 * need to send on the frames and tidy up. 1509 */ 1510 found = false; 1511 spin_lock_bh(&mfc_unres_lock); 1512 list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) { 1513 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && 1514 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { 1515 list_del(&uc->list); 1516 atomic_dec(&mrt->cache_resolve_queue_len); 1517 found = true; 1518 break; 1519 } 1520 } 1521 if (list_empty(&mrt->mfc6_unres_queue)) 1522 del_timer(&mrt->ipmr_expire_timer); 1523 spin_unlock_bh(&mfc_unres_lock); 1524 1525 if (found) { 1526 ip6mr_cache_resolve(net, mrt, uc, c); 1527 ip6mr_cache_free(uc); 1528 } 1529 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1530 return 0; 1531 } 1532 1533 /* 1534 * Close the multicast socket, and clear the vif tables etc 1535 */ 1536 1537 static void mroute_clean_tables(struct mr6_table *mrt, bool all) 1538 { 1539 int i; 1540 LIST_HEAD(list); 1541 struct mfc6_cache *c, *next; 1542 1543 /* 1544 * Shut down all active vif entries 1545 */ 1546 for (i = 0; i < mrt->maxvif; i++) { 1547 if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC)) 1548 continue; 1549 mif6_delete(mrt, i, 0, &list); 1550 } 1551 unregister_netdevice_many(&list); 1552 1553 /* 1554 * Wipe the cache 1555 */ 1556 for (i = 0; i < MFC6_LINES; i++) { 1557 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) { 1558 if (!all && (c->mfc_flags & MFC_STATIC)) 1559 continue; 1560 write_lock_bh(&mrt_lock); 1561 list_del(&c->list); 1562 write_unlock_bh(&mrt_lock); 1563 1564 mr6_netlink_event(mrt, c, RTM_DELROUTE); 1565 ip6mr_cache_free(c); 1566 } 1567 } 1568 1569 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1570 spin_lock_bh(&mfc_unres_lock); 1571 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) { 1572 list_del(&c->list); 1573 mr6_netlink_event(mrt, c, RTM_DELROUTE); 1574 ip6mr_destroy_unres(mrt, c); 1575 } 1576 spin_unlock_bh(&mfc_unres_lock); 1577 } 1578 } 1579 1580 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk) 1581 { 1582 int err = 0; 1583 struct net *net = sock_net(sk); 1584 1585 rtnl_lock(); 1586 write_lock_bh(&mrt_lock); 1587 if (likely(mrt->mroute6_sk == NULL)) { 1588 mrt->mroute6_sk = sk; 1589 net->ipv6.devconf_all->mc_forwarding++; 1590 } else { 1591 err = -EADDRINUSE; 1592 } 1593 write_unlock_bh(&mrt_lock); 1594 1595 if (!err) 1596 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1597 NETCONFA_MC_FORWARDING, 1598 NETCONFA_IFINDEX_ALL, 1599 net->ipv6.devconf_all); 1600 rtnl_unlock(); 1601 1602 return err; 1603 } 1604 1605 int ip6mr_sk_done(struct sock *sk) 1606 { 1607 int err = -EACCES; 1608 struct net *net = sock_net(sk); 1609 struct mr6_table *mrt; 1610 1611 if (sk->sk_type != SOCK_RAW || 1612 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1613 return err; 1614 1615 rtnl_lock(); 1616 ip6mr_for_each_table(mrt, net) { 1617 if (sk == mrt->mroute6_sk) { 1618 write_lock_bh(&mrt_lock); 1619 mrt->mroute6_sk = NULL; 1620 net->ipv6.devconf_all->mc_forwarding--; 1621 write_unlock_bh(&mrt_lock); 1622 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1623 NETCONFA_MC_FORWARDING, 1624 NETCONFA_IFINDEX_ALL, 1625 net->ipv6.devconf_all); 1626 1627 mroute_clean_tables(mrt, false); 1628 err = 0; 1629 break; 1630 } 1631 } 1632 rtnl_unlock(); 1633 1634 return err; 1635 } 1636 1637 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) 1638 { 1639 struct mr6_table *mrt; 1640 struct flowi6 fl6 = { 1641 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 1642 .flowi6_oif = skb->dev->ifindex, 1643 .flowi6_mark = skb->mark, 1644 }; 1645 1646 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 1647 return NULL; 1648 1649 return mrt->mroute6_sk; 1650 } 1651 1652 /* 1653 * Socket options and virtual interface manipulation. The whole 1654 * virtual interface system is a complete heap, but unfortunately 1655 * that's how BSD mrouted happens to think. Maybe one day with a proper 1656 * MOSPF/PIM router set up we can clean this up. 1657 */ 1658 1659 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) 1660 { 1661 int ret, parent = 0; 1662 struct mif6ctl vif; 1663 struct mf6cctl mfc; 1664 mifi_t mifi; 1665 struct net *net = sock_net(sk); 1666 struct mr6_table *mrt; 1667 1668 if (sk->sk_type != SOCK_RAW || 1669 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1670 return -EOPNOTSUPP; 1671 1672 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1673 if (!mrt) 1674 return -ENOENT; 1675 1676 if (optname != MRT6_INIT) { 1677 if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN)) 1678 return -EACCES; 1679 } 1680 1681 switch (optname) { 1682 case MRT6_INIT: 1683 if (optlen < sizeof(int)) 1684 return -EINVAL; 1685 1686 return ip6mr_sk_init(mrt, sk); 1687 1688 case MRT6_DONE: 1689 return ip6mr_sk_done(sk); 1690 1691 case MRT6_ADD_MIF: 1692 if (optlen < sizeof(vif)) 1693 return -EINVAL; 1694 if (copy_from_user(&vif, optval, sizeof(vif))) 1695 return -EFAULT; 1696 if (vif.mif6c_mifi >= MAXMIFS) 1697 return -ENFILE; 1698 rtnl_lock(); 1699 ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk); 1700 rtnl_unlock(); 1701 return ret; 1702 1703 case MRT6_DEL_MIF: 1704 if (optlen < sizeof(mifi_t)) 1705 return -EINVAL; 1706 if (copy_from_user(&mifi, optval, sizeof(mifi_t))) 1707 return -EFAULT; 1708 rtnl_lock(); 1709 ret = mif6_delete(mrt, mifi, 0, NULL); 1710 rtnl_unlock(); 1711 return ret; 1712 1713 /* 1714 * Manipulate the forwarding caches. These live 1715 * in a sort of kernel/user symbiosis. 1716 */ 1717 case MRT6_ADD_MFC: 1718 case MRT6_DEL_MFC: 1719 parent = -1; 1720 /* fall through */ 1721 case MRT6_ADD_MFC_PROXY: 1722 case MRT6_DEL_MFC_PROXY: 1723 if (optlen < sizeof(mfc)) 1724 return -EINVAL; 1725 if (copy_from_user(&mfc, optval, sizeof(mfc))) 1726 return -EFAULT; 1727 if (parent == 0) 1728 parent = mfc.mf6cc_parent; 1729 rtnl_lock(); 1730 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) 1731 ret = ip6mr_mfc_delete(mrt, &mfc, parent); 1732 else 1733 ret = ip6mr_mfc_add(net, mrt, &mfc, 1734 sk == mrt->mroute6_sk, parent); 1735 rtnl_unlock(); 1736 return ret; 1737 1738 /* 1739 * Control PIM assert (to activate pim will activate assert) 1740 */ 1741 case MRT6_ASSERT: 1742 { 1743 int v; 1744 1745 if (optlen != sizeof(v)) 1746 return -EINVAL; 1747 if (get_user(v, (int __user *)optval)) 1748 return -EFAULT; 1749 mrt->mroute_do_assert = v; 1750 return 0; 1751 } 1752 1753 #ifdef CONFIG_IPV6_PIMSM_V2 1754 case MRT6_PIM: 1755 { 1756 int v; 1757 1758 if (optlen != sizeof(v)) 1759 return -EINVAL; 1760 if (get_user(v, (int __user *)optval)) 1761 return -EFAULT; 1762 v = !!v; 1763 rtnl_lock(); 1764 ret = 0; 1765 if (v != mrt->mroute_do_pim) { 1766 mrt->mroute_do_pim = v; 1767 mrt->mroute_do_assert = v; 1768 } 1769 rtnl_unlock(); 1770 return ret; 1771 } 1772 1773 #endif 1774 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 1775 case MRT6_TABLE: 1776 { 1777 u32 v; 1778 1779 if (optlen != sizeof(u32)) 1780 return -EINVAL; 1781 if (get_user(v, (u32 __user *)optval)) 1782 return -EFAULT; 1783 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ 1784 if (v != RT_TABLE_DEFAULT && v >= 100000000) 1785 return -EINVAL; 1786 if (sk == mrt->mroute6_sk) 1787 return -EBUSY; 1788 1789 rtnl_lock(); 1790 ret = 0; 1791 if (!ip6mr_new_table(net, v)) 1792 ret = -ENOMEM; 1793 raw6_sk(sk)->ip6mr_table = v; 1794 rtnl_unlock(); 1795 return ret; 1796 } 1797 #endif 1798 /* 1799 * Spurious command, or MRT6_VERSION which you cannot 1800 * set. 1801 */ 1802 default: 1803 return -ENOPROTOOPT; 1804 } 1805 } 1806 1807 /* 1808 * Getsock opt support for the multicast routing system. 1809 */ 1810 1811 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, 1812 int __user *optlen) 1813 { 1814 int olr; 1815 int val; 1816 struct net *net = sock_net(sk); 1817 struct mr6_table *mrt; 1818 1819 if (sk->sk_type != SOCK_RAW || 1820 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1821 return -EOPNOTSUPP; 1822 1823 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1824 if (!mrt) 1825 return -ENOENT; 1826 1827 switch (optname) { 1828 case MRT6_VERSION: 1829 val = 0x0305; 1830 break; 1831 #ifdef CONFIG_IPV6_PIMSM_V2 1832 case MRT6_PIM: 1833 val = mrt->mroute_do_pim; 1834 break; 1835 #endif 1836 case MRT6_ASSERT: 1837 val = mrt->mroute_do_assert; 1838 break; 1839 default: 1840 return -ENOPROTOOPT; 1841 } 1842 1843 if (get_user(olr, optlen)) 1844 return -EFAULT; 1845 1846 olr = min_t(int, olr, sizeof(int)); 1847 if (olr < 0) 1848 return -EINVAL; 1849 1850 if (put_user(olr, optlen)) 1851 return -EFAULT; 1852 if (copy_to_user(optval, &val, olr)) 1853 return -EFAULT; 1854 return 0; 1855 } 1856 1857 /* 1858 * The IP multicast ioctl support routines. 1859 */ 1860 1861 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) 1862 { 1863 struct sioc_sg_req6 sr; 1864 struct sioc_mif_req6 vr; 1865 struct vif_device *vif; 1866 struct mfc6_cache *c; 1867 struct net *net = sock_net(sk); 1868 struct mr6_table *mrt; 1869 1870 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1871 if (!mrt) 1872 return -ENOENT; 1873 1874 switch (cmd) { 1875 case SIOCGETMIFCNT_IN6: 1876 if (copy_from_user(&vr, arg, sizeof(vr))) 1877 return -EFAULT; 1878 if (vr.mifi >= mrt->maxvif) 1879 return -EINVAL; 1880 read_lock(&mrt_lock); 1881 vif = &mrt->vif6_table[vr.mifi]; 1882 if (MIF_EXISTS(mrt, vr.mifi)) { 1883 vr.icount = vif->pkt_in; 1884 vr.ocount = vif->pkt_out; 1885 vr.ibytes = vif->bytes_in; 1886 vr.obytes = vif->bytes_out; 1887 read_unlock(&mrt_lock); 1888 1889 if (copy_to_user(arg, &vr, sizeof(vr))) 1890 return -EFAULT; 1891 return 0; 1892 } 1893 read_unlock(&mrt_lock); 1894 return -EADDRNOTAVAIL; 1895 case SIOCGETSGCNT_IN6: 1896 if (copy_from_user(&sr, arg, sizeof(sr))) 1897 return -EFAULT; 1898 1899 read_lock(&mrt_lock); 1900 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1901 if (c) { 1902 sr.pktcnt = c->mfc_un.res.pkt; 1903 sr.bytecnt = c->mfc_un.res.bytes; 1904 sr.wrong_if = c->mfc_un.res.wrong_if; 1905 read_unlock(&mrt_lock); 1906 1907 if (copy_to_user(arg, &sr, sizeof(sr))) 1908 return -EFAULT; 1909 return 0; 1910 } 1911 read_unlock(&mrt_lock); 1912 return -EADDRNOTAVAIL; 1913 default: 1914 return -ENOIOCTLCMD; 1915 } 1916 } 1917 1918 #ifdef CONFIG_COMPAT 1919 struct compat_sioc_sg_req6 { 1920 struct sockaddr_in6 src; 1921 struct sockaddr_in6 grp; 1922 compat_ulong_t pktcnt; 1923 compat_ulong_t bytecnt; 1924 compat_ulong_t wrong_if; 1925 }; 1926 1927 struct compat_sioc_mif_req6 { 1928 mifi_t mifi; 1929 compat_ulong_t icount; 1930 compat_ulong_t ocount; 1931 compat_ulong_t ibytes; 1932 compat_ulong_t obytes; 1933 }; 1934 1935 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1936 { 1937 struct compat_sioc_sg_req6 sr; 1938 struct compat_sioc_mif_req6 vr; 1939 struct vif_device *vif; 1940 struct mfc6_cache *c; 1941 struct net *net = sock_net(sk); 1942 struct mr6_table *mrt; 1943 1944 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1945 if (!mrt) 1946 return -ENOENT; 1947 1948 switch (cmd) { 1949 case SIOCGETMIFCNT_IN6: 1950 if (copy_from_user(&vr, arg, sizeof(vr))) 1951 return -EFAULT; 1952 if (vr.mifi >= mrt->maxvif) 1953 return -EINVAL; 1954 read_lock(&mrt_lock); 1955 vif = &mrt->vif6_table[vr.mifi]; 1956 if (MIF_EXISTS(mrt, vr.mifi)) { 1957 vr.icount = vif->pkt_in; 1958 vr.ocount = vif->pkt_out; 1959 vr.ibytes = vif->bytes_in; 1960 vr.obytes = vif->bytes_out; 1961 read_unlock(&mrt_lock); 1962 1963 if (copy_to_user(arg, &vr, sizeof(vr))) 1964 return -EFAULT; 1965 return 0; 1966 } 1967 read_unlock(&mrt_lock); 1968 return -EADDRNOTAVAIL; 1969 case SIOCGETSGCNT_IN6: 1970 if (copy_from_user(&sr, arg, sizeof(sr))) 1971 return -EFAULT; 1972 1973 read_lock(&mrt_lock); 1974 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1975 if (c) { 1976 sr.pktcnt = c->mfc_un.res.pkt; 1977 sr.bytecnt = c->mfc_un.res.bytes; 1978 sr.wrong_if = c->mfc_un.res.wrong_if; 1979 read_unlock(&mrt_lock); 1980 1981 if (copy_to_user(arg, &sr, sizeof(sr))) 1982 return -EFAULT; 1983 return 0; 1984 } 1985 read_unlock(&mrt_lock); 1986 return -EADDRNOTAVAIL; 1987 default: 1988 return -ENOIOCTLCMD; 1989 } 1990 } 1991 #endif 1992 1993 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 1994 { 1995 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 1996 IPSTATS_MIB_OUTFORWDATAGRAMS); 1997 __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), 1998 IPSTATS_MIB_OUTOCTETS, skb->len); 1999 return dst_output(net, sk, skb); 2000 } 2001 2002 /* 2003 * Processing handlers for ip6mr_forward 2004 */ 2005 2006 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, 2007 struct sk_buff *skb, struct mfc6_cache *c, int vifi) 2008 { 2009 struct ipv6hdr *ipv6h; 2010 struct vif_device *vif = &mrt->vif6_table[vifi]; 2011 struct net_device *dev; 2012 struct dst_entry *dst; 2013 struct flowi6 fl6; 2014 2015 if (!vif->dev) 2016 goto out_free; 2017 2018 #ifdef CONFIG_IPV6_PIMSM_V2 2019 if (vif->flags & MIFF_REGISTER) { 2020 vif->pkt_out++; 2021 vif->bytes_out += skb->len; 2022 vif->dev->stats.tx_bytes += skb->len; 2023 vif->dev->stats.tx_packets++; 2024 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); 2025 goto out_free; 2026 } 2027 #endif 2028 2029 ipv6h = ipv6_hdr(skb); 2030 2031 fl6 = (struct flowi6) { 2032 .flowi6_oif = vif->link, 2033 .daddr = ipv6h->daddr, 2034 }; 2035 2036 dst = ip6_route_output(net, NULL, &fl6); 2037 if (dst->error) { 2038 dst_release(dst); 2039 goto out_free; 2040 } 2041 2042 skb_dst_drop(skb); 2043 skb_dst_set(skb, dst); 2044 2045 /* 2046 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 2047 * not only before forwarding, but after forwarding on all output 2048 * interfaces. It is clear, if mrouter runs a multicasting 2049 * program, it should receive packets not depending to what interface 2050 * program is joined. 2051 * If we will not make it, the program will have to join on all 2052 * interfaces. On the other hand, multihoming host (or router, but 2053 * not mrouter) cannot join to more than one interface - it will 2054 * result in receiving multiple packets. 2055 */ 2056 dev = vif->dev; 2057 skb->dev = dev; 2058 vif->pkt_out++; 2059 vif->bytes_out += skb->len; 2060 2061 /* We are about to write */ 2062 /* XXX: extension headers? */ 2063 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev))) 2064 goto out_free; 2065 2066 ipv6h = ipv6_hdr(skb); 2067 ipv6h->hop_limit--; 2068 2069 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 2070 2071 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 2072 net, NULL, skb, skb->dev, dev, 2073 ip6mr_forward2_finish); 2074 2075 out_free: 2076 kfree_skb(skb); 2077 return 0; 2078 } 2079 2080 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev) 2081 { 2082 int ct; 2083 2084 for (ct = mrt->maxvif - 1; ct >= 0; ct--) { 2085 if (mrt->vif6_table[ct].dev == dev) 2086 break; 2087 } 2088 return ct; 2089 } 2090 2091 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt, 2092 struct sk_buff *skb, struct mfc6_cache *cache) 2093 { 2094 int psend = -1; 2095 int vif, ct; 2096 int true_vifi = ip6mr_find_vif(mrt, skb->dev); 2097 2098 vif = cache->mf6c_parent; 2099 cache->mfc_un.res.pkt++; 2100 cache->mfc_un.res.bytes += skb->len; 2101 cache->mfc_un.res.lastuse = jiffies; 2102 2103 if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) { 2104 struct mfc6_cache *cache_proxy; 2105 2106 /* For an (*,G) entry, we only check that the incoming 2107 * interface is part of the static tree. 2108 */ 2109 cache_proxy = ip6mr_cache_find_any_parent(mrt, vif); 2110 if (cache_proxy && 2111 cache_proxy->mfc_un.res.ttls[true_vifi] < 255) 2112 goto forward; 2113 } 2114 2115 /* 2116 * Wrong interface: drop packet and (maybe) send PIM assert. 2117 */ 2118 if (mrt->vif6_table[vif].dev != skb->dev) { 2119 cache->mfc_un.res.wrong_if++; 2120 2121 if (true_vifi >= 0 && mrt->mroute_do_assert && 2122 /* pimsm uses asserts, when switching from RPT to SPT, 2123 so that we cannot check that packet arrived on an oif. 2124 It is bad, but otherwise we would need to move pretty 2125 large chunk of pimd to kernel. Ough... --ANK 2126 */ 2127 (mrt->mroute_do_pim || 2128 cache->mfc_un.res.ttls[true_vifi] < 255) && 2129 time_after(jiffies, 2130 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 2131 cache->mfc_un.res.last_assert = jiffies; 2132 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); 2133 } 2134 goto dont_forward; 2135 } 2136 2137 forward: 2138 mrt->vif6_table[vif].pkt_in++; 2139 mrt->vif6_table[vif].bytes_in += skb->len; 2140 2141 /* 2142 * Forward the frame 2143 */ 2144 if (ipv6_addr_any(&cache->mf6c_origin) && 2145 ipv6_addr_any(&cache->mf6c_mcastgrp)) { 2146 if (true_vifi >= 0 && 2147 true_vifi != cache->mf6c_parent && 2148 ipv6_hdr(skb)->hop_limit > 2149 cache->mfc_un.res.ttls[cache->mf6c_parent]) { 2150 /* It's an (*,*) entry and the packet is not coming from 2151 * the upstream: forward the packet to the upstream 2152 * only. 2153 */ 2154 psend = cache->mf6c_parent; 2155 goto last_forward; 2156 } 2157 goto dont_forward; 2158 } 2159 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { 2160 /* For (*,G) entry, don't forward to the incoming interface */ 2161 if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) && 2162 ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { 2163 if (psend != -1) { 2164 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2165 if (skb2) 2166 ip6mr_forward2(net, mrt, skb2, cache, psend); 2167 } 2168 psend = ct; 2169 } 2170 } 2171 last_forward: 2172 if (psend != -1) { 2173 ip6mr_forward2(net, mrt, skb, cache, psend); 2174 return; 2175 } 2176 2177 dont_forward: 2178 kfree_skb(skb); 2179 } 2180 2181 2182 /* 2183 * Multicast packets for forwarding arrive here 2184 */ 2185 2186 int ip6_mr_input(struct sk_buff *skb) 2187 { 2188 struct mfc6_cache *cache; 2189 struct net *net = dev_net(skb->dev); 2190 struct mr6_table *mrt; 2191 struct flowi6 fl6 = { 2192 .flowi6_iif = skb->dev->ifindex, 2193 .flowi6_mark = skb->mark, 2194 }; 2195 int err; 2196 2197 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2198 if (err < 0) { 2199 kfree_skb(skb); 2200 return err; 2201 } 2202 2203 read_lock(&mrt_lock); 2204 cache = ip6mr_cache_find(mrt, 2205 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2206 if (!cache) { 2207 int vif = ip6mr_find_vif(mrt, skb->dev); 2208 2209 if (vif >= 0) 2210 cache = ip6mr_cache_find_any(mrt, 2211 &ipv6_hdr(skb)->daddr, 2212 vif); 2213 } 2214 2215 /* 2216 * No usable cache entry 2217 */ 2218 if (!cache) { 2219 int vif; 2220 2221 vif = ip6mr_find_vif(mrt, skb->dev); 2222 if (vif >= 0) { 2223 int err = ip6mr_cache_unresolved(mrt, vif, skb); 2224 read_unlock(&mrt_lock); 2225 2226 return err; 2227 } 2228 read_unlock(&mrt_lock); 2229 kfree_skb(skb); 2230 return -ENODEV; 2231 } 2232 2233 ip6_mr_forward(net, mrt, skb, cache); 2234 2235 read_unlock(&mrt_lock); 2236 2237 return 0; 2238 } 2239 2240 2241 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, 2242 struct mfc6_cache *c, struct rtmsg *rtm) 2243 { 2244 struct rta_mfc_stats mfcs; 2245 struct nlattr *mp_attr; 2246 struct rtnexthop *nhp; 2247 unsigned long lastuse; 2248 int ct; 2249 2250 /* If cache is unresolved, don't try to parse IIF and OIF */ 2251 if (c->mf6c_parent >= MAXMIFS) { 2252 rtm->rtm_flags |= RTNH_F_UNRESOLVED; 2253 return -ENOENT; 2254 } 2255 2256 if (MIF_EXISTS(mrt, c->mf6c_parent) && 2257 nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0) 2258 return -EMSGSIZE; 2259 mp_attr = nla_nest_start(skb, RTA_MULTIPATH); 2260 if (!mp_attr) 2261 return -EMSGSIZE; 2262 2263 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 2264 if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { 2265 nhp = nla_reserve_nohdr(skb, sizeof(*nhp)); 2266 if (!nhp) { 2267 nla_nest_cancel(skb, mp_attr); 2268 return -EMSGSIZE; 2269 } 2270 2271 nhp->rtnh_flags = 0; 2272 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 2273 nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex; 2274 nhp->rtnh_len = sizeof(*nhp); 2275 } 2276 } 2277 2278 nla_nest_end(skb, mp_attr); 2279 2280 lastuse = READ_ONCE(c->mfc_un.res.lastuse); 2281 lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; 2282 2283 mfcs.mfcs_packets = c->mfc_un.res.pkt; 2284 mfcs.mfcs_bytes = c->mfc_un.res.bytes; 2285 mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; 2286 if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || 2287 nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), 2288 RTA_PAD)) 2289 return -EMSGSIZE; 2290 2291 rtm->rtm_type = RTN_MULTICAST; 2292 return 1; 2293 } 2294 2295 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, 2296 u32 portid) 2297 { 2298 int err; 2299 struct mr6_table *mrt; 2300 struct mfc6_cache *cache; 2301 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 2302 2303 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 2304 if (!mrt) 2305 return -ENOENT; 2306 2307 read_lock(&mrt_lock); 2308 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); 2309 if (!cache && skb->dev) { 2310 int vif = ip6mr_find_vif(mrt, skb->dev); 2311 2312 if (vif >= 0) 2313 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, 2314 vif); 2315 } 2316 2317 if (!cache) { 2318 struct sk_buff *skb2; 2319 struct ipv6hdr *iph; 2320 struct net_device *dev; 2321 int vif; 2322 2323 dev = skb->dev; 2324 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { 2325 read_unlock(&mrt_lock); 2326 return -ENODEV; 2327 } 2328 2329 /* really correct? */ 2330 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); 2331 if (!skb2) { 2332 read_unlock(&mrt_lock); 2333 return -ENOMEM; 2334 } 2335 2336 NETLINK_CB(skb2).portid = portid; 2337 skb_reset_transport_header(skb2); 2338 2339 skb_put(skb2, sizeof(struct ipv6hdr)); 2340 skb_reset_network_header(skb2); 2341 2342 iph = ipv6_hdr(skb2); 2343 iph->version = 0; 2344 iph->priority = 0; 2345 iph->flow_lbl[0] = 0; 2346 iph->flow_lbl[1] = 0; 2347 iph->flow_lbl[2] = 0; 2348 iph->payload_len = 0; 2349 iph->nexthdr = IPPROTO_NONE; 2350 iph->hop_limit = 0; 2351 iph->saddr = rt->rt6i_src.addr; 2352 iph->daddr = rt->rt6i_dst.addr; 2353 2354 err = ip6mr_cache_unresolved(mrt, vif, skb2); 2355 read_unlock(&mrt_lock); 2356 2357 return err; 2358 } 2359 2360 if (rtm->rtm_flags & RTM_F_NOTIFY) 2361 cache->mfc_flags |= MFC_NOTIFY; 2362 2363 err = __ip6mr_fill_mroute(mrt, skb, cache, rtm); 2364 read_unlock(&mrt_lock); 2365 return err; 2366 } 2367 2368 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, 2369 u32 portid, u32 seq, struct mfc6_cache *c, int cmd, 2370 int flags) 2371 { 2372 struct nlmsghdr *nlh; 2373 struct rtmsg *rtm; 2374 int err; 2375 2376 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2377 if (!nlh) 2378 return -EMSGSIZE; 2379 2380 rtm = nlmsg_data(nlh); 2381 rtm->rtm_family = RTNL_FAMILY_IP6MR; 2382 rtm->rtm_dst_len = 128; 2383 rtm->rtm_src_len = 128; 2384 rtm->rtm_tos = 0; 2385 rtm->rtm_table = mrt->id; 2386 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2387 goto nla_put_failure; 2388 rtm->rtm_type = RTN_MULTICAST; 2389 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2390 if (c->mfc_flags & MFC_STATIC) 2391 rtm->rtm_protocol = RTPROT_STATIC; 2392 else 2393 rtm->rtm_protocol = RTPROT_MROUTED; 2394 rtm->rtm_flags = 0; 2395 2396 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || 2397 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) 2398 goto nla_put_failure; 2399 err = __ip6mr_fill_mroute(mrt, skb, c, rtm); 2400 /* do not break the dump if cache is unresolved */ 2401 if (err < 0 && err != -ENOENT) 2402 goto nla_put_failure; 2403 2404 nlmsg_end(skb, nlh); 2405 return 0; 2406 2407 nla_put_failure: 2408 nlmsg_cancel(skb, nlh); 2409 return -EMSGSIZE; 2410 } 2411 2412 static int mr6_msgsize(bool unresolved, int maxvif) 2413 { 2414 size_t len = 2415 NLMSG_ALIGN(sizeof(struct rtmsg)) 2416 + nla_total_size(4) /* RTA_TABLE */ 2417 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */ 2418 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */ 2419 ; 2420 2421 if (!unresolved) 2422 len = len 2423 + nla_total_size(4) /* RTA_IIF */ 2424 + nla_total_size(0) /* RTA_MULTIPATH */ 2425 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2426 /* RTA_MFC_STATS */ 2427 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2428 ; 2429 2430 return len; 2431 } 2432 2433 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, 2434 int cmd) 2435 { 2436 struct net *net = read_pnet(&mrt->net); 2437 struct sk_buff *skb; 2438 int err = -ENOBUFS; 2439 2440 skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif), 2441 GFP_ATOMIC); 2442 if (!skb) 2443 goto errout; 2444 2445 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2446 if (err < 0) 2447 goto errout; 2448 2449 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC); 2450 return; 2451 2452 errout: 2453 kfree_skb(skb); 2454 if (err < 0) 2455 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err); 2456 } 2457 2458 static size_t mrt6msg_netlink_msgsize(size_t payloadlen) 2459 { 2460 size_t len = 2461 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2462 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */ 2463 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */ 2464 /* IP6MRA_CREPORT_SRC_ADDR */ 2465 + nla_total_size(sizeof(struct in6_addr)) 2466 /* IP6MRA_CREPORT_DST_ADDR */ 2467 + nla_total_size(sizeof(struct in6_addr)) 2468 /* IP6MRA_CREPORT_PKT */ 2469 + nla_total_size(payloadlen) 2470 ; 2471 2472 return len; 2473 } 2474 2475 static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt) 2476 { 2477 struct net *net = read_pnet(&mrt->net); 2478 struct nlmsghdr *nlh; 2479 struct rtgenmsg *rtgenm; 2480 struct mrt6msg *msg; 2481 struct sk_buff *skb; 2482 struct nlattr *nla; 2483 int payloadlen; 2484 2485 payloadlen = pkt->len - sizeof(struct mrt6msg); 2486 msg = (struct mrt6msg *)skb_transport_header(pkt); 2487 2488 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2489 if (!skb) 2490 goto errout; 2491 2492 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2493 sizeof(struct rtgenmsg), 0); 2494 if (!nlh) 2495 goto errout; 2496 rtgenm = nlmsg_data(nlh); 2497 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR; 2498 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) || 2499 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) || 2500 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR, 2501 &msg->im6_src) || 2502 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR, 2503 &msg->im6_dst)) 2504 goto nla_put_failure; 2505 2506 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen); 2507 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg), 2508 nla_data(nla), payloadlen)) 2509 goto nla_put_failure; 2510 2511 nlmsg_end(skb, nlh); 2512 2513 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC); 2514 return; 2515 2516 nla_put_failure: 2517 nlmsg_cancel(skb, nlh); 2518 errout: 2519 kfree_skb(skb); 2520 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS); 2521 } 2522 2523 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2524 { 2525 struct net *net = sock_net(skb->sk); 2526 struct mr6_table *mrt; 2527 struct mfc6_cache *mfc; 2528 unsigned int t = 0, s_t; 2529 unsigned int h = 0, s_h; 2530 unsigned int e = 0, s_e; 2531 2532 s_t = cb->args[0]; 2533 s_h = cb->args[1]; 2534 s_e = cb->args[2]; 2535 2536 read_lock(&mrt_lock); 2537 ip6mr_for_each_table(mrt, net) { 2538 if (t < s_t) 2539 goto next_table; 2540 if (t > s_t) 2541 s_h = 0; 2542 for (h = s_h; h < MFC6_LINES; h++) { 2543 list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) { 2544 if (e < s_e) 2545 goto next_entry; 2546 if (ip6mr_fill_mroute(mrt, skb, 2547 NETLINK_CB(cb->skb).portid, 2548 cb->nlh->nlmsg_seq, 2549 mfc, RTM_NEWROUTE, 2550 NLM_F_MULTI) < 0) 2551 goto done; 2552 next_entry: 2553 e++; 2554 } 2555 e = s_e = 0; 2556 } 2557 spin_lock_bh(&mfc_unres_lock); 2558 list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) { 2559 if (e < s_e) 2560 goto next_entry2; 2561 if (ip6mr_fill_mroute(mrt, skb, 2562 NETLINK_CB(cb->skb).portid, 2563 cb->nlh->nlmsg_seq, 2564 mfc, RTM_NEWROUTE, 2565 NLM_F_MULTI) < 0) { 2566 spin_unlock_bh(&mfc_unres_lock); 2567 goto done; 2568 } 2569 next_entry2: 2570 e++; 2571 } 2572 spin_unlock_bh(&mfc_unres_lock); 2573 e = s_e = 0; 2574 s_h = 0; 2575 next_table: 2576 t++; 2577 } 2578 done: 2579 read_unlock(&mrt_lock); 2580 2581 cb->args[2] = e; 2582 cb->args[1] = h; 2583 cb->args[0] = t; 2584 2585 return skb->len; 2586 } 2587