1 /* 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 * $Id: ipoib_multicast.c 1362 2004-12-18 15:56:29Z roland $ 33 */ 34 35 #include <linux/skbuff.h> 36 #include <linux/rtnetlink.h> 37 #include <linux/ip.h> 38 #include <linux/in.h> 39 #include <linux/igmp.h> 40 #include <linux/inetdevice.h> 41 #include <linux/delay.h> 42 #include <linux/completion.h> 43 44 #include "ipoib.h" 45 46 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 47 static int mcast_debug_level; 48 49 module_param(mcast_debug_level, int, 0644); 50 MODULE_PARM_DESC(mcast_debug_level, 51 "Enable multicast debug tracing if > 0"); 52 #endif 53 54 static DECLARE_MUTEX(mcast_mutex); 55 56 /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ 57 struct ipoib_mcast { 58 struct ib_sa_mcmember_rec mcmember; 59 struct ipoib_ah *ah; 60 61 struct rb_node rb_node; 62 struct list_head list; 63 struct completion done; 64 65 int query_id; 66 struct ib_sa_query *query; 67 68 unsigned long created; 69 unsigned long backoff; 70 71 unsigned long flags; 72 unsigned char logcount; 73 74 struct list_head neigh_list; 75 76 struct sk_buff_head pkt_queue; 77 78 struct net_device *dev; 79 }; 80 81 struct ipoib_mcast_iter { 82 struct net_device *dev; 83 union ib_gid mgid; 84 unsigned long created; 85 unsigned int queuelen; 86 unsigned int complete; 87 unsigned int send_only; 88 }; 89 90 static void ipoib_mcast_free(struct ipoib_mcast *mcast) 91 { 92 struct net_device *dev = mcast->dev; 93 struct ipoib_dev_priv *priv = netdev_priv(dev); 94 struct ipoib_neigh *neigh, *tmp; 95 unsigned long flags; 96 LIST_HEAD(ah_list); 97 struct ipoib_ah *ah, *tah; 98 99 ipoib_dbg_mcast(netdev_priv(dev), 100 "deleting multicast group " IPOIB_GID_FMT "\n", 101 IPOIB_GID_ARG(mcast->mcmember.mgid)); 102 103 spin_lock_irqsave(&priv->lock, flags); 104 105 list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) { 106 if (neigh->ah) 107 list_add_tail(&neigh->ah->list, &ah_list); 108 *to_ipoib_neigh(neigh->neighbour) = NULL; 109 neigh->neighbour->ops->destructor = NULL; 110 kfree(neigh); 111 } 112 113 spin_unlock_irqrestore(&priv->lock, flags); 114 115 list_for_each_entry_safe(ah, tah, &ah_list, list) 116 ipoib_put_ah(ah); 117 118 if (mcast->ah) 119 ipoib_put_ah(mcast->ah); 120 121 while (!skb_queue_empty(&mcast->pkt_queue)) { 122 struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); 123 124 skb->dev = dev; 125 dev_kfree_skb_any(skb); 126 } 127 128 kfree(mcast); 129 } 130 131 static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, 132 int can_sleep) 133 { 134 struct ipoib_mcast *mcast; 135 136 mcast = kmalloc(sizeof (*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC); 137 if (!mcast) 138 return NULL; 139 140 memset(mcast, 0, sizeof (*mcast)); 141 142 init_completion(&mcast->done); 143 144 mcast->dev = dev; 145 mcast->created = jiffies; 146 mcast->backoff = HZ; 147 mcast->logcount = 0; 148 149 INIT_LIST_HEAD(&mcast->list); 150 INIT_LIST_HEAD(&mcast->neigh_list); 151 skb_queue_head_init(&mcast->pkt_queue); 152 153 mcast->ah = NULL; 154 mcast->query = NULL; 155 156 return mcast; 157 } 158 159 static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, union ib_gid *mgid) 160 { 161 struct ipoib_dev_priv *priv = netdev_priv(dev); 162 struct rb_node *n = priv->multicast_tree.rb_node; 163 164 while (n) { 165 struct ipoib_mcast *mcast; 166 int ret; 167 168 mcast = rb_entry(n, struct ipoib_mcast, rb_node); 169 170 ret = memcmp(mgid->raw, mcast->mcmember.mgid.raw, 171 sizeof (union ib_gid)); 172 if (ret < 0) 173 n = n->rb_left; 174 else if (ret > 0) 175 n = n->rb_right; 176 else 177 return mcast; 178 } 179 180 return NULL; 181 } 182 183 static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast) 184 { 185 struct ipoib_dev_priv *priv = netdev_priv(dev); 186 struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL; 187 188 while (*n) { 189 struct ipoib_mcast *tmcast; 190 int ret; 191 192 pn = *n; 193 tmcast = rb_entry(pn, struct ipoib_mcast, rb_node); 194 195 ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw, 196 sizeof (union ib_gid)); 197 if (ret < 0) 198 n = &pn->rb_left; 199 else if (ret > 0) 200 n = &pn->rb_right; 201 else 202 return -EEXIST; 203 } 204 205 rb_link_node(&mcast->rb_node, pn, n); 206 rb_insert_color(&mcast->rb_node, &priv->multicast_tree); 207 208 return 0; 209 } 210 211 static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, 212 struct ib_sa_mcmember_rec *mcmember) 213 { 214 struct net_device *dev = mcast->dev; 215 struct ipoib_dev_priv *priv = netdev_priv(dev); 216 int ret; 217 218 mcast->mcmember = *mcmember; 219 220 /* Set the cached Q_Key before we attach if it's the broadcast group */ 221 if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, 222 sizeof (union ib_gid))) { 223 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); 224 priv->tx_wr.wr.ud.remote_qkey = priv->qkey; 225 } 226 227 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 228 if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 229 ipoib_warn(priv, "multicast group " IPOIB_GID_FMT 230 " already attached\n", 231 IPOIB_GID_ARG(mcast->mcmember.mgid)); 232 233 return 0; 234 } 235 236 ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), 237 &mcast->mcmember.mgid); 238 if (ret < 0) { 239 ipoib_warn(priv, "couldn't attach QP to multicast group " 240 IPOIB_GID_FMT "\n", 241 IPOIB_GID_ARG(mcast->mcmember.mgid)); 242 243 clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags); 244 return ret; 245 } 246 } 247 248 { 249 struct ib_ah_attr av = { 250 .dlid = be16_to_cpu(mcast->mcmember.mlid), 251 .port_num = priv->port, 252 .sl = mcast->mcmember.sl, 253 .ah_flags = IB_AH_GRH, 254 .grh = { 255 .flow_label = be32_to_cpu(mcast->mcmember.flow_label), 256 .hop_limit = mcast->mcmember.hop_limit, 257 .sgid_index = 0, 258 .traffic_class = mcast->mcmember.traffic_class 259 } 260 }; 261 262 av.grh.dgid = mcast->mcmember.mgid; 263 264 if (ib_sa_rate_enum_to_int(mcast->mcmember.rate) > 0) 265 av.static_rate = (2 * priv->local_rate - 266 ib_sa_rate_enum_to_int(mcast->mcmember.rate) - 1) / 267 (priv->local_rate ? priv->local_rate : 1); 268 269 ipoib_dbg_mcast(priv, "static_rate %d for local port %dX, mcmember %dX\n", 270 av.static_rate, priv->local_rate, 271 ib_sa_rate_enum_to_int(mcast->mcmember.rate)); 272 273 mcast->ah = ipoib_create_ah(dev, priv->pd, &av); 274 if (!mcast->ah) { 275 ipoib_warn(priv, "ib_address_create failed\n"); 276 } else { 277 ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT 278 " AV %p, LID 0x%04x, SL %d\n", 279 IPOIB_GID_ARG(mcast->mcmember.mgid), 280 mcast->ah->ah, 281 be16_to_cpu(mcast->mcmember.mlid), 282 mcast->mcmember.sl); 283 } 284 } 285 286 /* actually send any queued packets */ 287 while (!skb_queue_empty(&mcast->pkt_queue)) { 288 struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); 289 290 skb->dev = dev; 291 292 if (!skb->dst || !skb->dst->neighbour) { 293 /* put pseudoheader back on for next time */ 294 skb_push(skb, sizeof (struct ipoib_pseudoheader)); 295 } 296 297 if (dev_queue_xmit(skb)) 298 ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); 299 } 300 301 return 0; 302 } 303 304 static void 305 ipoib_mcast_sendonly_join_complete(int status, 306 struct ib_sa_mcmember_rec *mcmember, 307 void *mcast_ptr) 308 { 309 struct ipoib_mcast *mcast = mcast_ptr; 310 struct net_device *dev = mcast->dev; 311 312 if (!status) 313 ipoib_mcast_join_finish(mcast, mcmember); 314 else { 315 if (mcast->logcount++ < 20) 316 ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for " 317 IPOIB_GID_FMT ", status %d\n", 318 IPOIB_GID_ARG(mcast->mcmember.mgid), status); 319 320 /* Flush out any queued packets */ 321 while (!skb_queue_empty(&mcast->pkt_queue)) { 322 struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); 323 324 skb->dev = dev; 325 326 dev_kfree_skb_any(skb); 327 } 328 329 /* Clear the busy flag so we try again */ 330 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 331 } 332 333 complete(&mcast->done); 334 } 335 336 static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) 337 { 338 struct net_device *dev = mcast->dev; 339 struct ipoib_dev_priv *priv = netdev_priv(dev); 340 struct ib_sa_mcmember_rec rec = { 341 #if 0 /* Some SMs don't support send-only yet */ 342 .join_state = 4 343 #else 344 .join_state = 1 345 #endif 346 }; 347 int ret = 0; 348 349 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { 350 ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n"); 351 return -ENODEV; 352 } 353 354 if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { 355 ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n"); 356 return -EBUSY; 357 } 358 359 rec.mgid = mcast->mcmember.mgid; 360 rec.port_gid = priv->local_gid; 361 rec.pkey = be16_to_cpu(priv->pkey); 362 363 ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, 364 IB_SA_MCMEMBER_REC_MGID | 365 IB_SA_MCMEMBER_REC_PORT_GID | 366 IB_SA_MCMEMBER_REC_PKEY | 367 IB_SA_MCMEMBER_REC_JOIN_STATE, 368 1000, GFP_ATOMIC, 369 ipoib_mcast_sendonly_join_complete, 370 mcast, &mcast->query); 371 if (ret < 0) { 372 ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n", 373 ret); 374 } else { 375 ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT 376 ", starting join\n", 377 IPOIB_GID_ARG(mcast->mcmember.mgid)); 378 379 mcast->query_id = ret; 380 } 381 382 return ret; 383 } 384 385 static void ipoib_mcast_join_complete(int status, 386 struct ib_sa_mcmember_rec *mcmember, 387 void *mcast_ptr) 388 { 389 struct ipoib_mcast *mcast = mcast_ptr; 390 struct net_device *dev = mcast->dev; 391 struct ipoib_dev_priv *priv = netdev_priv(dev); 392 393 ipoib_dbg_mcast(priv, "join completion for " IPOIB_GID_FMT 394 " (status %d)\n", 395 IPOIB_GID_ARG(mcast->mcmember.mgid), status); 396 397 if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) { 398 mcast->backoff = HZ; 399 down(&mcast_mutex); 400 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 401 queue_work(ipoib_workqueue, &priv->mcast_task); 402 up(&mcast_mutex); 403 complete(&mcast->done); 404 return; 405 } 406 407 if (status == -EINTR) { 408 complete(&mcast->done); 409 return; 410 } 411 412 if (status && mcast->logcount++ < 20) { 413 if (status == -ETIMEDOUT || status == -EINTR) { 414 ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT 415 ", status %d\n", 416 IPOIB_GID_ARG(mcast->mcmember.mgid), 417 status); 418 } else { 419 ipoib_warn(priv, "multicast join failed for " 420 IPOIB_GID_FMT ", status %d\n", 421 IPOIB_GID_ARG(mcast->mcmember.mgid), 422 status); 423 } 424 } 425 426 mcast->backoff *= 2; 427 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 428 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 429 430 mcast->query = NULL; 431 432 down(&mcast_mutex); 433 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) { 434 if (status == -ETIMEDOUT) 435 queue_work(ipoib_workqueue, &priv->mcast_task); 436 else 437 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 438 mcast->backoff * HZ); 439 } else 440 complete(&mcast->done); 441 up(&mcast_mutex); 442 443 return; 444 } 445 446 static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, 447 int create) 448 { 449 struct ipoib_dev_priv *priv = netdev_priv(dev); 450 struct ib_sa_mcmember_rec rec = { 451 .join_state = 1 452 }; 453 ib_sa_comp_mask comp_mask; 454 int ret = 0; 455 456 ipoib_dbg_mcast(priv, "joining MGID " IPOIB_GID_FMT "\n", 457 IPOIB_GID_ARG(mcast->mcmember.mgid)); 458 459 rec.mgid = mcast->mcmember.mgid; 460 rec.port_gid = priv->local_gid; 461 rec.pkey = be16_to_cpu(priv->pkey); 462 463 comp_mask = 464 IB_SA_MCMEMBER_REC_MGID | 465 IB_SA_MCMEMBER_REC_PORT_GID | 466 IB_SA_MCMEMBER_REC_PKEY | 467 IB_SA_MCMEMBER_REC_JOIN_STATE; 468 469 if (create) { 470 comp_mask |= 471 IB_SA_MCMEMBER_REC_QKEY | 472 IB_SA_MCMEMBER_REC_SL | 473 IB_SA_MCMEMBER_REC_FLOW_LABEL | 474 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; 475 476 rec.qkey = priv->broadcast->mcmember.qkey; 477 rec.sl = priv->broadcast->mcmember.sl; 478 rec.flow_label = priv->broadcast->mcmember.flow_label; 479 rec.traffic_class = priv->broadcast->mcmember.traffic_class; 480 } 481 482 ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask, 483 mcast->backoff * 1000, GFP_ATOMIC, 484 ipoib_mcast_join_complete, 485 mcast, &mcast->query); 486 487 if (ret < 0) { 488 ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret); 489 490 mcast->backoff *= 2; 491 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 492 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 493 494 down(&mcast_mutex); 495 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 496 queue_delayed_work(ipoib_workqueue, 497 &priv->mcast_task, 498 mcast->backoff); 499 up(&mcast_mutex); 500 } else 501 mcast->query_id = ret; 502 } 503 504 void ipoib_mcast_join_task(void *dev_ptr) 505 { 506 struct net_device *dev = dev_ptr; 507 struct ipoib_dev_priv *priv = netdev_priv(dev); 508 509 if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) 510 return; 511 512 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) 513 ipoib_warn(priv, "ib_gid_entry_get() failed\n"); 514 else 515 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); 516 517 { 518 struct ib_port_attr attr; 519 520 if (!ib_query_port(priv->ca, priv->port, &attr)) { 521 priv->local_lid = attr.lid; 522 priv->local_rate = attr.active_speed * 523 ib_width_enum_to_int(attr.active_width); 524 } else 525 ipoib_warn(priv, "ib_query_port failed\n"); 526 } 527 528 if (!priv->broadcast) { 529 priv->broadcast = ipoib_mcast_alloc(dev, 1); 530 if (!priv->broadcast) { 531 ipoib_warn(priv, "failed to allocate broadcast group\n"); 532 down(&mcast_mutex); 533 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 534 queue_delayed_work(ipoib_workqueue, 535 &priv->mcast_task, HZ); 536 up(&mcast_mutex); 537 return; 538 } 539 540 memcpy(priv->broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4, 541 sizeof (union ib_gid)); 542 543 spin_lock_irq(&priv->lock); 544 __ipoib_mcast_add(dev, priv->broadcast); 545 spin_unlock_irq(&priv->lock); 546 } 547 548 if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { 549 ipoib_mcast_join(dev, priv->broadcast, 0); 550 return; 551 } 552 553 while (1) { 554 struct ipoib_mcast *mcast = NULL; 555 556 spin_lock_irq(&priv->lock); 557 list_for_each_entry(mcast, &priv->multicast_list, list) { 558 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) 559 && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) 560 && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 561 /* Found the next unjoined group */ 562 break; 563 } 564 } 565 spin_unlock_irq(&priv->lock); 566 567 if (&mcast->list == &priv->multicast_list) { 568 /* All done */ 569 break; 570 } 571 572 ipoib_mcast_join(dev, mcast, 1); 573 return; 574 } 575 576 priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - 577 IPOIB_ENCAP_LEN; 578 dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); 579 580 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); 581 582 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 583 netif_carrier_on(dev); 584 } 585 586 int ipoib_mcast_start_thread(struct net_device *dev) 587 { 588 struct ipoib_dev_priv *priv = netdev_priv(dev); 589 590 ipoib_dbg_mcast(priv, "starting multicast thread\n"); 591 592 down(&mcast_mutex); 593 if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) 594 queue_work(ipoib_workqueue, &priv->mcast_task); 595 up(&mcast_mutex); 596 597 return 0; 598 } 599 600 int ipoib_mcast_stop_thread(struct net_device *dev) 601 { 602 struct ipoib_dev_priv *priv = netdev_priv(dev); 603 struct ipoib_mcast *mcast; 604 605 ipoib_dbg_mcast(priv, "stopping multicast thread\n"); 606 607 down(&mcast_mutex); 608 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 609 cancel_delayed_work(&priv->mcast_task); 610 up(&mcast_mutex); 611 612 flush_workqueue(ipoib_workqueue); 613 614 if (priv->broadcast && priv->broadcast->query) { 615 ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query); 616 priv->broadcast->query = NULL; 617 ipoib_dbg_mcast(priv, "waiting for bcast\n"); 618 wait_for_completion(&priv->broadcast->done); 619 } 620 621 list_for_each_entry(mcast, &priv->multicast_list, list) { 622 if (mcast->query) { 623 ib_sa_cancel_query(mcast->query_id, mcast->query); 624 mcast->query = NULL; 625 ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n", 626 IPOIB_GID_ARG(mcast->mcmember.mgid)); 627 wait_for_completion(&mcast->done); 628 } 629 } 630 631 return 0; 632 } 633 634 static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) 635 { 636 struct ipoib_dev_priv *priv = netdev_priv(dev); 637 struct ib_sa_mcmember_rec rec = { 638 .join_state = 1 639 }; 640 int ret = 0; 641 642 if (!test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) 643 return 0; 644 645 ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n", 646 IPOIB_GID_ARG(mcast->mcmember.mgid)); 647 648 rec.mgid = mcast->mcmember.mgid; 649 rec.port_gid = priv->local_gid; 650 rec.pkey = be16_to_cpu(priv->pkey); 651 652 /* Remove ourselves from the multicast group */ 653 ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), 654 &mcast->mcmember.mgid); 655 if (ret) 656 ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); 657 658 /* 659 * Just make one shot at leaving and don't wait for a reply; 660 * if we fail, too bad. 661 */ 662 ret = ib_sa_mcmember_rec_delete(priv->ca, priv->port, &rec, 663 IB_SA_MCMEMBER_REC_MGID | 664 IB_SA_MCMEMBER_REC_PORT_GID | 665 IB_SA_MCMEMBER_REC_PKEY | 666 IB_SA_MCMEMBER_REC_JOIN_STATE, 667 0, GFP_ATOMIC, NULL, 668 mcast, &mcast->query); 669 if (ret < 0) 670 ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed " 671 "for leave (result = %d)\n", ret); 672 673 return 0; 674 } 675 676 void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid, 677 struct sk_buff *skb) 678 { 679 struct ipoib_dev_priv *priv = netdev_priv(dev); 680 struct ipoib_mcast *mcast; 681 682 /* 683 * We can only be called from ipoib_start_xmit, so we're 684 * inside tx_lock -- no need to save/restore flags. 685 */ 686 spin_lock(&priv->lock); 687 688 mcast = __ipoib_mcast_find(dev, mgid); 689 if (!mcast) { 690 /* Let's create a new send only group now */ 691 ipoib_dbg_mcast(priv, "setting up send only multicast group for " 692 IPOIB_GID_FMT "\n", IPOIB_GID_ARG(*mgid)); 693 694 mcast = ipoib_mcast_alloc(dev, 0); 695 if (!mcast) { 696 ipoib_warn(priv, "unable to allocate memory for " 697 "multicast structure\n"); 698 dev_kfree_skb_any(skb); 699 goto out; 700 } 701 702 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); 703 mcast->mcmember.mgid = *mgid; 704 __ipoib_mcast_add(dev, mcast); 705 list_add_tail(&mcast->list, &priv->multicast_list); 706 } 707 708 if (!mcast->ah) { 709 if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) 710 skb_queue_tail(&mcast->pkt_queue, skb); 711 else 712 dev_kfree_skb_any(skb); 713 714 if (mcast->query) 715 ipoib_dbg_mcast(priv, "no address vector, " 716 "but multicast join already started\n"); 717 else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) 718 ipoib_mcast_sendonly_join(mcast); 719 720 /* 721 * If lookup completes between here and out:, don't 722 * want to send packet twice. 723 */ 724 mcast = NULL; 725 } 726 727 out: 728 if (mcast && mcast->ah) { 729 if (skb->dst && 730 skb->dst->neighbour && 731 !*to_ipoib_neigh(skb->dst->neighbour)) { 732 struct ipoib_neigh *neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); 733 734 if (neigh) { 735 kref_get(&mcast->ah->ref); 736 neigh->ah = mcast->ah; 737 neigh->neighbour = skb->dst->neighbour; 738 *to_ipoib_neigh(skb->dst->neighbour) = neigh; 739 list_add_tail(&neigh->list, &mcast->neigh_list); 740 } 741 } 742 743 ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); 744 } 745 746 spin_unlock(&priv->lock); 747 } 748 749 void ipoib_mcast_dev_flush(struct net_device *dev) 750 { 751 struct ipoib_dev_priv *priv = netdev_priv(dev); 752 LIST_HEAD(remove_list); 753 struct ipoib_mcast *mcast, *tmcast, *nmcast; 754 unsigned long flags; 755 756 ipoib_dbg_mcast(priv, "flushing multicast list\n"); 757 758 spin_lock_irqsave(&priv->lock, flags); 759 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { 760 nmcast = ipoib_mcast_alloc(dev, 0); 761 if (nmcast) { 762 nmcast->flags = 763 mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY); 764 765 nmcast->mcmember.mgid = mcast->mcmember.mgid; 766 767 /* Add the new group in before the to-be-destroyed group */ 768 list_add_tail(&nmcast->list, &mcast->list); 769 list_del_init(&mcast->list); 770 771 rb_replace_node(&mcast->rb_node, &nmcast->rb_node, 772 &priv->multicast_tree); 773 774 list_add_tail(&mcast->list, &remove_list); 775 } else { 776 ipoib_warn(priv, "could not reallocate multicast group " 777 IPOIB_GID_FMT "\n", 778 IPOIB_GID_ARG(mcast->mcmember.mgid)); 779 } 780 } 781 782 if (priv->broadcast) { 783 nmcast = ipoib_mcast_alloc(dev, 0); 784 if (nmcast) { 785 nmcast->mcmember.mgid = priv->broadcast->mcmember.mgid; 786 787 rb_replace_node(&priv->broadcast->rb_node, 788 &nmcast->rb_node, 789 &priv->multicast_tree); 790 791 list_add_tail(&priv->broadcast->list, &remove_list); 792 } 793 794 priv->broadcast = nmcast; 795 } 796 797 spin_unlock_irqrestore(&priv->lock, flags); 798 799 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 800 ipoib_mcast_leave(dev, mcast); 801 ipoib_mcast_free(mcast); 802 } 803 } 804 805 void ipoib_mcast_dev_down(struct net_device *dev) 806 { 807 struct ipoib_dev_priv *priv = netdev_priv(dev); 808 unsigned long flags; 809 810 /* Delete broadcast since it will be recreated */ 811 if (priv->broadcast) { 812 ipoib_dbg_mcast(priv, "deleting broadcast group\n"); 813 814 spin_lock_irqsave(&priv->lock, flags); 815 rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); 816 spin_unlock_irqrestore(&priv->lock, flags); 817 ipoib_mcast_leave(dev, priv->broadcast); 818 ipoib_mcast_free(priv->broadcast); 819 priv->broadcast = NULL; 820 } 821 } 822 823 void ipoib_mcast_restart_task(void *dev_ptr) 824 { 825 struct net_device *dev = dev_ptr; 826 struct ipoib_dev_priv *priv = netdev_priv(dev); 827 struct dev_mc_list *mclist; 828 struct ipoib_mcast *mcast, *tmcast; 829 LIST_HEAD(remove_list); 830 unsigned long flags; 831 832 ipoib_dbg_mcast(priv, "restarting multicast task\n"); 833 834 ipoib_mcast_stop_thread(dev); 835 836 spin_lock_irqsave(&priv->lock, flags); 837 838 /* 839 * Unfortunately, the networking core only gives us a list of all of 840 * the multicast hardware addresses. We need to figure out which ones 841 * are new and which ones have been removed 842 */ 843 844 /* Clear out the found flag */ 845 list_for_each_entry(mcast, &priv->multicast_list, list) 846 clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); 847 848 /* Mark all of the entries that are found or don't exist */ 849 for (mclist = dev->mc_list; mclist; mclist = mclist->next) { 850 union ib_gid mgid; 851 852 memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid); 853 854 /* Add in the P_Key */ 855 mgid.raw[4] = (priv->pkey >> 8) & 0xff; 856 mgid.raw[5] = priv->pkey & 0xff; 857 858 mcast = __ipoib_mcast_find(dev, &mgid); 859 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 860 struct ipoib_mcast *nmcast; 861 862 /* Not found or send-only group, let's add a new entry */ 863 ipoib_dbg_mcast(priv, "adding multicast entry for mgid " 864 IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid)); 865 866 nmcast = ipoib_mcast_alloc(dev, 0); 867 if (!nmcast) { 868 ipoib_warn(priv, "unable to allocate memory for multicast structure\n"); 869 continue; 870 } 871 872 set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags); 873 874 nmcast->mcmember.mgid = mgid; 875 876 if (mcast) { 877 /* Destroy the send only entry */ 878 list_del(&mcast->list); 879 list_add_tail(&mcast->list, &remove_list); 880 881 rb_replace_node(&mcast->rb_node, 882 &nmcast->rb_node, 883 &priv->multicast_tree); 884 } else 885 __ipoib_mcast_add(dev, nmcast); 886 887 list_add_tail(&nmcast->list, &priv->multicast_list); 888 } 889 890 if (mcast) 891 set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); 892 } 893 894 /* Remove all of the entries don't exist anymore */ 895 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { 896 if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) && 897 !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 898 ipoib_dbg_mcast(priv, "deleting multicast group " IPOIB_GID_FMT "\n", 899 IPOIB_GID_ARG(mcast->mcmember.mgid)); 900 901 rb_erase(&mcast->rb_node, &priv->multicast_tree); 902 903 /* Move to the remove list */ 904 list_del(&mcast->list); 905 list_add_tail(&mcast->list, &remove_list); 906 } 907 } 908 spin_unlock_irqrestore(&priv->lock, flags); 909 910 /* We have to cancel outside of the spinlock */ 911 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 912 ipoib_mcast_leave(mcast->dev, mcast); 913 ipoib_mcast_free(mcast); 914 } 915 916 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 917 ipoib_mcast_start_thread(dev); 918 } 919 920 struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) 921 { 922 struct ipoib_mcast_iter *iter; 923 924 iter = kmalloc(sizeof *iter, GFP_KERNEL); 925 if (!iter) 926 return NULL; 927 928 iter->dev = dev; 929 memset(iter->mgid.raw, 0, sizeof iter->mgid); 930 931 if (ipoib_mcast_iter_next(iter)) { 932 ipoib_mcast_iter_free(iter); 933 return NULL; 934 } 935 936 return iter; 937 } 938 939 void ipoib_mcast_iter_free(struct ipoib_mcast_iter *iter) 940 { 941 kfree(iter); 942 } 943 944 int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter) 945 { 946 struct ipoib_dev_priv *priv = netdev_priv(iter->dev); 947 struct rb_node *n; 948 struct ipoib_mcast *mcast; 949 int ret = 1; 950 951 spin_lock_irq(&priv->lock); 952 953 n = rb_first(&priv->multicast_tree); 954 955 while (n) { 956 mcast = rb_entry(n, struct ipoib_mcast, rb_node); 957 958 if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw, 959 sizeof (union ib_gid)) < 0) { 960 iter->mgid = mcast->mcmember.mgid; 961 iter->created = mcast->created; 962 iter->queuelen = skb_queue_len(&mcast->pkt_queue); 963 iter->complete = !!mcast->ah; 964 iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY)); 965 966 ret = 0; 967 968 break; 969 } 970 971 n = rb_next(n); 972 } 973 974 spin_unlock_irq(&priv->lock); 975 976 return ret; 977 } 978 979 void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, 980 union ib_gid *mgid, 981 unsigned long *created, 982 unsigned int *queuelen, 983 unsigned int *complete, 984 unsigned int *send_only) 985 { 986 *mgid = iter->mgid; 987 *created = iter->created; 988 *queuelen = iter->queuelen; 989 *complete = iter->complete; 990 *send_only = iter->send_only; 991 } 992