1 /* 2 * Copyright (c) 2006 Intel Corporation. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/completion.h> 34 #include <linux/dma-mapping.h> 35 #include <linux/err.h> 36 #include <linux/interrupt.h> 37 #include <linux/bitops.h> 38 #include <linux/random.h> 39 40 #include <rdma/ib_cache.h> 41 #include "sa.h" 42 43 static void mcast_add_one(struct ib_device *device); 44 static void mcast_remove_one(struct ib_device *device); 45 46 static struct ib_client mcast_client = { 47 .name = "ib_multicast", 48 .add = mcast_add_one, 49 .remove = mcast_remove_one 50 }; 51 52 static struct ib_sa_client sa_client; 53 static struct workqueue_struct *mcast_wq; 54 static union ib_gid mgid0; 55 56 struct mcast_device; 57 58 struct mcast_port { 59 struct mcast_device *dev; 60 spinlock_t lock; 61 struct rb_root table; 62 atomic_t refcount; 63 struct completion comp; 64 u8 port_num; 65 }; 66 67 struct mcast_device { 68 struct ib_device *device; 69 struct ib_event_handler event_handler; 70 int start_port; 71 int end_port; 72 struct mcast_port port[0]; 73 }; 74 75 enum mcast_state { 76 MCAST_JOINING, 77 MCAST_MEMBER, 78 MCAST_ERROR, 79 }; 80 81 enum mcast_group_state { 82 MCAST_IDLE, 83 MCAST_BUSY, 84 MCAST_GROUP_ERROR, 85 MCAST_PKEY_EVENT 86 }; 87 88 enum { 89 MCAST_INVALID_PKEY_INDEX = 0xFFFF 90 }; 91 92 struct mcast_member; 93 94 struct mcast_group { 95 struct ib_sa_mcmember_rec rec; 96 struct rb_node node; 97 struct mcast_port *port; 98 spinlock_t lock; 99 struct work_struct work; 100 struct list_head pending_list; 101 struct list_head active_list; 102 struct mcast_member *last_join; 103 int members[3]; 104 atomic_t refcount; 105 enum mcast_group_state state; 106 struct ib_sa_query *query; 107 int query_id; 108 u16 pkey_index; 109 }; 110 111 struct mcast_member { 112 struct ib_sa_multicast multicast; 113 struct ib_sa_client *client; 114 struct mcast_group *group; 115 struct list_head list; 116 enum mcast_state state; 117 atomic_t refcount; 118 struct completion comp; 119 }; 120 121 static void join_handler(int status, struct ib_sa_mcmember_rec *rec, 122 void *context); 123 static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, 124 void *context); 125 126 static struct mcast_group *mcast_find(struct mcast_port *port, 127 union ib_gid *mgid) 128 { 129 struct rb_node *node = port->table.rb_node; 130 struct mcast_group *group; 131 int ret; 132 133 while (node) { 134 group = rb_entry(node, struct mcast_group, node); 135 ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid); 136 if (!ret) 137 return group; 138 139 if (ret < 0) 140 node = node->rb_left; 141 else 142 node = node->rb_right; 143 } 144 return NULL; 145 } 146 147 static struct mcast_group *mcast_insert(struct mcast_port *port, 148 struct mcast_group *group, 149 int allow_duplicates) 150 { 151 struct rb_node **link = &port->table.rb_node; 152 struct rb_node *parent = NULL; 153 struct mcast_group *cur_group; 154 int ret; 155 156 while (*link) { 157 parent = *link; 158 cur_group = rb_entry(parent, struct mcast_group, node); 159 160 ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw, 161 sizeof group->rec.mgid); 162 if (ret < 0) 163 link = &(*link)->rb_left; 164 else if (ret > 0) 165 link = &(*link)->rb_right; 166 else if (allow_duplicates) 167 link = &(*link)->rb_left; 168 else 169 return cur_group; 170 } 171 rb_link_node(&group->node, parent, link); 172 rb_insert_color(&group->node, &port->table); 173 return NULL; 174 } 175 176 static void deref_port(struct mcast_port *port) 177 { 178 if (atomic_dec_and_test(&port->refcount)) 179 complete(&port->comp); 180 } 181 182 static void release_group(struct mcast_group *group) 183 { 184 struct mcast_port *port = group->port; 185 unsigned long flags; 186 187 spin_lock_irqsave(&port->lock, flags); 188 if (atomic_dec_and_test(&group->refcount)) { 189 rb_erase(&group->node, &port->table); 190 spin_unlock_irqrestore(&port->lock, flags); 191 kfree(group); 192 deref_port(port); 193 } else 194 spin_unlock_irqrestore(&port->lock, flags); 195 } 196 197 static void deref_member(struct mcast_member *member) 198 { 199 if (atomic_dec_and_test(&member->refcount)) 200 complete(&member->comp); 201 } 202 203 static void queue_join(struct mcast_member *member) 204 { 205 struct mcast_group *group = member->group; 206 unsigned long flags; 207 208 spin_lock_irqsave(&group->lock, flags); 209 list_add_tail(&member->list, &group->pending_list); 210 if (group->state == MCAST_IDLE) { 211 group->state = MCAST_BUSY; 212 atomic_inc(&group->refcount); 213 queue_work(mcast_wq, &group->work); 214 } 215 spin_unlock_irqrestore(&group->lock, flags); 216 } 217 218 /* 219 * A multicast group has three types of members: full member, non member, and 220 * send only member. We need to keep track of the number of members of each 221 * type based on their join state. Adjust the number of members the belong to 222 * the specified join states. 223 */ 224 static void adjust_membership(struct mcast_group *group, u8 join_state, int inc) 225 { 226 int i; 227 228 for (i = 0; i < 3; i++, join_state >>= 1) 229 if (join_state & 0x1) 230 group->members[i] += inc; 231 } 232 233 /* 234 * If a multicast group has zero members left for a particular join state, but 235 * the group is still a member with the SA, we need to leave that join state. 236 * Determine which join states we still belong to, but that do not have any 237 * active members. 238 */ 239 static u8 get_leave_state(struct mcast_group *group) 240 { 241 u8 leave_state = 0; 242 int i; 243 244 for (i = 0; i < 3; i++) 245 if (!group->members[i]) 246 leave_state |= (0x1 << i); 247 248 return leave_state & group->rec.join_state; 249 } 250 251 static int check_selector(ib_sa_comp_mask comp_mask, 252 ib_sa_comp_mask selector_mask, 253 ib_sa_comp_mask value_mask, 254 u8 selector, u8 src_value, u8 dst_value) 255 { 256 int err; 257 258 if (!(comp_mask & selector_mask) || !(comp_mask & value_mask)) 259 return 0; 260 261 switch (selector) { 262 case IB_SA_GT: 263 err = (src_value <= dst_value); 264 break; 265 case IB_SA_LT: 266 err = (src_value >= dst_value); 267 break; 268 case IB_SA_EQ: 269 err = (src_value != dst_value); 270 break; 271 default: 272 err = 0; 273 break; 274 } 275 276 return err; 277 } 278 279 static int cmp_rec(struct ib_sa_mcmember_rec *src, 280 struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask) 281 { 282 /* MGID must already match */ 283 284 if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID && 285 memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid)) 286 return -EINVAL; 287 if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey) 288 return -EINVAL; 289 if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid) 290 return -EINVAL; 291 if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR, 292 IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector, 293 src->mtu, dst->mtu)) 294 return -EINVAL; 295 if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS && 296 src->traffic_class != dst->traffic_class) 297 return -EINVAL; 298 if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey) 299 return -EINVAL; 300 if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR, 301 IB_SA_MCMEMBER_REC_RATE, dst->rate_selector, 302 src->rate, dst->rate)) 303 return -EINVAL; 304 if (check_selector(comp_mask, 305 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR, 306 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME, 307 dst->packet_life_time_selector, 308 src->packet_life_time, dst->packet_life_time)) 309 return -EINVAL; 310 if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl) 311 return -EINVAL; 312 if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL && 313 src->flow_label != dst->flow_label) 314 return -EINVAL; 315 if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT && 316 src->hop_limit != dst->hop_limit) 317 return -EINVAL; 318 if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope) 319 return -EINVAL; 320 321 /* join_state checked separately, proxy_join ignored */ 322 323 return 0; 324 } 325 326 static int send_join(struct mcast_group *group, struct mcast_member *member) 327 { 328 struct mcast_port *port = group->port; 329 int ret; 330 331 group->last_join = member; 332 ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, 333 port->port_num, IB_MGMT_METHOD_SET, 334 &member->multicast.rec, 335 member->multicast.comp_mask, 336 3000, GFP_KERNEL, join_handler, group, 337 &group->query); 338 if (ret >= 0) { 339 group->query_id = ret; 340 ret = 0; 341 } 342 return ret; 343 } 344 345 static int send_leave(struct mcast_group *group, u8 leave_state) 346 { 347 struct mcast_port *port = group->port; 348 struct ib_sa_mcmember_rec rec; 349 int ret; 350 351 rec = group->rec; 352 rec.join_state = leave_state; 353 354 ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, 355 port->port_num, IB_SA_METHOD_DELETE, &rec, 356 IB_SA_MCMEMBER_REC_MGID | 357 IB_SA_MCMEMBER_REC_PORT_GID | 358 IB_SA_MCMEMBER_REC_JOIN_STATE, 359 3000, GFP_KERNEL, leave_handler, 360 group, &group->query); 361 if (ret >= 0) { 362 group->query_id = ret; 363 ret = 0; 364 } 365 return ret; 366 } 367 368 static void join_group(struct mcast_group *group, struct mcast_member *member, 369 u8 join_state) 370 { 371 member->state = MCAST_MEMBER; 372 adjust_membership(group, join_state, 1); 373 group->rec.join_state |= join_state; 374 member->multicast.rec = group->rec; 375 member->multicast.rec.join_state = join_state; 376 list_move(&member->list, &group->active_list); 377 } 378 379 static int fail_join(struct mcast_group *group, struct mcast_member *member, 380 int status) 381 { 382 spin_lock_irq(&group->lock); 383 list_del_init(&member->list); 384 spin_unlock_irq(&group->lock); 385 return member->multicast.callback(status, &member->multicast); 386 } 387 388 static void process_group_error(struct mcast_group *group) 389 { 390 struct mcast_member *member; 391 int ret = 0; 392 u16 pkey_index; 393 394 if (group->state == MCAST_PKEY_EVENT) 395 ret = ib_find_pkey(group->port->dev->device, 396 group->port->port_num, 397 be16_to_cpu(group->rec.pkey), &pkey_index); 398 399 spin_lock_irq(&group->lock); 400 if (group->state == MCAST_PKEY_EVENT && !ret && 401 group->pkey_index == pkey_index) 402 goto out; 403 404 while (!list_empty(&group->active_list)) { 405 member = list_entry(group->active_list.next, 406 struct mcast_member, list); 407 atomic_inc(&member->refcount); 408 list_del_init(&member->list); 409 adjust_membership(group, member->multicast.rec.join_state, -1); 410 member->state = MCAST_ERROR; 411 spin_unlock_irq(&group->lock); 412 413 ret = member->multicast.callback(-ENETRESET, 414 &member->multicast); 415 deref_member(member); 416 if (ret) 417 ib_sa_free_multicast(&member->multicast); 418 spin_lock_irq(&group->lock); 419 } 420 421 group->rec.join_state = 0; 422 out: 423 group->state = MCAST_BUSY; 424 spin_unlock_irq(&group->lock); 425 } 426 427 static void mcast_work_handler(struct work_struct *work) 428 { 429 struct mcast_group *group; 430 struct mcast_member *member; 431 struct ib_sa_multicast *multicast; 432 int status, ret; 433 u8 join_state; 434 435 group = container_of(work, typeof(*group), work); 436 retest: 437 spin_lock_irq(&group->lock); 438 while (!list_empty(&group->pending_list) || 439 (group->state != MCAST_BUSY)) { 440 441 if (group->state != MCAST_BUSY) { 442 spin_unlock_irq(&group->lock); 443 process_group_error(group); 444 goto retest; 445 } 446 447 member = list_entry(group->pending_list.next, 448 struct mcast_member, list); 449 multicast = &member->multicast; 450 join_state = multicast->rec.join_state; 451 atomic_inc(&member->refcount); 452 453 if (join_state == (group->rec.join_state & join_state)) { 454 status = cmp_rec(&group->rec, &multicast->rec, 455 multicast->comp_mask); 456 if (!status) 457 join_group(group, member, join_state); 458 else 459 list_del_init(&member->list); 460 spin_unlock_irq(&group->lock); 461 ret = multicast->callback(status, multicast); 462 } else { 463 spin_unlock_irq(&group->lock); 464 status = send_join(group, member); 465 if (!status) { 466 deref_member(member); 467 return; 468 } 469 ret = fail_join(group, member, status); 470 } 471 472 deref_member(member); 473 if (ret) 474 ib_sa_free_multicast(&member->multicast); 475 spin_lock_irq(&group->lock); 476 } 477 478 join_state = get_leave_state(group); 479 if (join_state) { 480 group->rec.join_state &= ~join_state; 481 spin_unlock_irq(&group->lock); 482 if (send_leave(group, join_state)) 483 goto retest; 484 } else { 485 group->state = MCAST_IDLE; 486 spin_unlock_irq(&group->lock); 487 release_group(group); 488 } 489 } 490 491 /* 492 * Fail a join request if it is still active - at the head of the pending queue. 493 */ 494 static void process_join_error(struct mcast_group *group, int status) 495 { 496 struct mcast_member *member; 497 int ret; 498 499 spin_lock_irq(&group->lock); 500 member = list_entry(group->pending_list.next, 501 struct mcast_member, list); 502 if (group->last_join == member) { 503 atomic_inc(&member->refcount); 504 list_del_init(&member->list); 505 spin_unlock_irq(&group->lock); 506 ret = member->multicast.callback(status, &member->multicast); 507 deref_member(member); 508 if (ret) 509 ib_sa_free_multicast(&member->multicast); 510 } else 511 spin_unlock_irq(&group->lock); 512 } 513 514 static void join_handler(int status, struct ib_sa_mcmember_rec *rec, 515 void *context) 516 { 517 struct mcast_group *group = context; 518 u16 pkey_index = MCAST_INVALID_PKEY_INDEX; 519 520 if (status) 521 process_join_error(group, status); 522 else { 523 ib_find_pkey(group->port->dev->device, group->port->port_num, 524 be16_to_cpu(rec->pkey), &pkey_index); 525 526 spin_lock_irq(&group->port->lock); 527 group->rec = *rec; 528 if (group->state == MCAST_BUSY && 529 group->pkey_index == MCAST_INVALID_PKEY_INDEX) 530 group->pkey_index = pkey_index; 531 if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) { 532 rb_erase(&group->node, &group->port->table); 533 mcast_insert(group->port, group, 1); 534 } 535 spin_unlock_irq(&group->port->lock); 536 } 537 mcast_work_handler(&group->work); 538 } 539 540 static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, 541 void *context) 542 { 543 struct mcast_group *group = context; 544 545 mcast_work_handler(&group->work); 546 } 547 548 static struct mcast_group *acquire_group(struct mcast_port *port, 549 union ib_gid *mgid, gfp_t gfp_mask) 550 { 551 struct mcast_group *group, *cur_group; 552 unsigned long flags; 553 int is_mgid0; 554 555 is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0); 556 if (!is_mgid0) { 557 spin_lock_irqsave(&port->lock, flags); 558 group = mcast_find(port, mgid); 559 if (group) 560 goto found; 561 spin_unlock_irqrestore(&port->lock, flags); 562 } 563 564 group = kzalloc(sizeof *group, gfp_mask); 565 if (!group) 566 return NULL; 567 568 group->port = port; 569 group->rec.mgid = *mgid; 570 group->pkey_index = MCAST_INVALID_PKEY_INDEX; 571 INIT_LIST_HEAD(&group->pending_list); 572 INIT_LIST_HEAD(&group->active_list); 573 INIT_WORK(&group->work, mcast_work_handler); 574 spin_lock_init(&group->lock); 575 576 spin_lock_irqsave(&port->lock, flags); 577 cur_group = mcast_insert(port, group, is_mgid0); 578 if (cur_group) { 579 kfree(group); 580 group = cur_group; 581 } else 582 atomic_inc(&port->refcount); 583 found: 584 atomic_inc(&group->refcount); 585 spin_unlock_irqrestore(&port->lock, flags); 586 return group; 587 } 588 589 /* 590 * We serialize all join requests to a single group to make our lives much 591 * easier. Otherwise, two users could try to join the same group 592 * simultaneously, with different configurations, one could leave while the 593 * join is in progress, etc., which makes locking around error recovery 594 * difficult. 595 */ 596 struct ib_sa_multicast * 597 ib_sa_join_multicast(struct ib_sa_client *client, 598 struct ib_device *device, u8 port_num, 599 struct ib_sa_mcmember_rec *rec, 600 ib_sa_comp_mask comp_mask, gfp_t gfp_mask, 601 int (*callback)(int status, 602 struct ib_sa_multicast *multicast), 603 void *context) 604 { 605 struct mcast_device *dev; 606 struct mcast_member *member; 607 struct ib_sa_multicast *multicast; 608 int ret; 609 610 dev = ib_get_client_data(device, &mcast_client); 611 if (!dev) 612 return ERR_PTR(-ENODEV); 613 614 member = kmalloc(sizeof *member, gfp_mask); 615 if (!member) 616 return ERR_PTR(-ENOMEM); 617 618 ib_sa_client_get(client); 619 member->client = client; 620 member->multicast.rec = *rec; 621 member->multicast.comp_mask = comp_mask; 622 member->multicast.callback = callback; 623 member->multicast.context = context; 624 init_completion(&member->comp); 625 atomic_set(&member->refcount, 1); 626 member->state = MCAST_JOINING; 627 628 member->group = acquire_group(&dev->port[port_num - dev->start_port], 629 &rec->mgid, gfp_mask); 630 if (!member->group) { 631 ret = -ENOMEM; 632 goto err; 633 } 634 635 /* 636 * The user will get the multicast structure in their callback. They 637 * could then free the multicast structure before we can return from 638 * this routine. So we save the pointer to return before queuing 639 * any callback. 640 */ 641 multicast = &member->multicast; 642 queue_join(member); 643 return multicast; 644 645 err: 646 ib_sa_client_put(client); 647 kfree(member); 648 return ERR_PTR(ret); 649 } 650 EXPORT_SYMBOL(ib_sa_join_multicast); 651 652 void ib_sa_free_multicast(struct ib_sa_multicast *multicast) 653 { 654 struct mcast_member *member; 655 struct mcast_group *group; 656 657 member = container_of(multicast, struct mcast_member, multicast); 658 group = member->group; 659 660 spin_lock_irq(&group->lock); 661 if (member->state == MCAST_MEMBER) 662 adjust_membership(group, multicast->rec.join_state, -1); 663 664 list_del_init(&member->list); 665 666 if (group->state == MCAST_IDLE) { 667 group->state = MCAST_BUSY; 668 spin_unlock_irq(&group->lock); 669 /* Continue to hold reference on group until callback */ 670 queue_work(mcast_wq, &group->work); 671 } else { 672 spin_unlock_irq(&group->lock); 673 release_group(group); 674 } 675 676 deref_member(member); 677 wait_for_completion(&member->comp); 678 ib_sa_client_put(member->client); 679 kfree(member); 680 } 681 EXPORT_SYMBOL(ib_sa_free_multicast); 682 683 int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, 684 union ib_gid *mgid, struct ib_sa_mcmember_rec *rec) 685 { 686 struct mcast_device *dev; 687 struct mcast_port *port; 688 struct mcast_group *group; 689 unsigned long flags; 690 int ret = 0; 691 692 dev = ib_get_client_data(device, &mcast_client); 693 if (!dev) 694 return -ENODEV; 695 696 port = &dev->port[port_num - dev->start_port]; 697 spin_lock_irqsave(&port->lock, flags); 698 group = mcast_find(port, mgid); 699 if (group) 700 *rec = group->rec; 701 else 702 ret = -EADDRNOTAVAIL; 703 spin_unlock_irqrestore(&port->lock, flags); 704 705 return ret; 706 } 707 EXPORT_SYMBOL(ib_sa_get_mcmember_rec); 708 709 int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, 710 struct ib_sa_mcmember_rec *rec, 711 struct ib_ah_attr *ah_attr) 712 { 713 int ret; 714 u16 gid_index; 715 u8 p; 716 717 ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index); 718 if (ret) 719 return ret; 720 721 memset(ah_attr, 0, sizeof *ah_attr); 722 ah_attr->dlid = be16_to_cpu(rec->mlid); 723 ah_attr->sl = rec->sl; 724 ah_attr->port_num = port_num; 725 ah_attr->static_rate = rec->rate; 726 727 ah_attr->ah_flags = IB_AH_GRH; 728 ah_attr->grh.dgid = rec->mgid; 729 730 ah_attr->grh.sgid_index = (u8) gid_index; 731 ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); 732 ah_attr->grh.hop_limit = rec->hop_limit; 733 ah_attr->grh.traffic_class = rec->traffic_class; 734 735 return 0; 736 } 737 EXPORT_SYMBOL(ib_init_ah_from_mcmember); 738 739 static void mcast_groups_event(struct mcast_port *port, 740 enum mcast_group_state state) 741 { 742 struct mcast_group *group; 743 struct rb_node *node; 744 unsigned long flags; 745 746 spin_lock_irqsave(&port->lock, flags); 747 for (node = rb_first(&port->table); node; node = rb_next(node)) { 748 group = rb_entry(node, struct mcast_group, node); 749 spin_lock(&group->lock); 750 if (group->state == MCAST_IDLE) { 751 atomic_inc(&group->refcount); 752 queue_work(mcast_wq, &group->work); 753 } 754 if (group->state != MCAST_GROUP_ERROR) 755 group->state = state; 756 spin_unlock(&group->lock); 757 } 758 spin_unlock_irqrestore(&port->lock, flags); 759 } 760 761 static void mcast_event_handler(struct ib_event_handler *handler, 762 struct ib_event *event) 763 { 764 struct mcast_device *dev; 765 int index; 766 767 dev = container_of(handler, struct mcast_device, event_handler); 768 index = event->element.port_num - dev->start_port; 769 770 switch (event->event) { 771 case IB_EVENT_PORT_ERR: 772 case IB_EVENT_LID_CHANGE: 773 case IB_EVENT_SM_CHANGE: 774 case IB_EVENT_CLIENT_REREGISTER: 775 mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR); 776 break; 777 case IB_EVENT_PKEY_CHANGE: 778 mcast_groups_event(&dev->port[index], MCAST_PKEY_EVENT); 779 break; 780 default: 781 break; 782 } 783 } 784 785 static void mcast_add_one(struct ib_device *device) 786 { 787 struct mcast_device *dev; 788 struct mcast_port *port; 789 int i; 790 791 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) 792 return; 793 794 dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port, 795 GFP_KERNEL); 796 if (!dev) 797 return; 798 799 if (device->node_type == RDMA_NODE_IB_SWITCH) 800 dev->start_port = dev->end_port = 0; 801 else { 802 dev->start_port = 1; 803 dev->end_port = device->phys_port_cnt; 804 } 805 806 for (i = 0; i <= dev->end_port - dev->start_port; i++) { 807 port = &dev->port[i]; 808 port->dev = dev; 809 port->port_num = dev->start_port + i; 810 spin_lock_init(&port->lock); 811 port->table = RB_ROOT; 812 init_completion(&port->comp); 813 atomic_set(&port->refcount, 1); 814 } 815 816 dev->device = device; 817 ib_set_client_data(device, &mcast_client, dev); 818 819 INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler); 820 ib_register_event_handler(&dev->event_handler); 821 } 822 823 static void mcast_remove_one(struct ib_device *device) 824 { 825 struct mcast_device *dev; 826 struct mcast_port *port; 827 int i; 828 829 dev = ib_get_client_data(device, &mcast_client); 830 if (!dev) 831 return; 832 833 ib_unregister_event_handler(&dev->event_handler); 834 flush_workqueue(mcast_wq); 835 836 for (i = 0; i <= dev->end_port - dev->start_port; i++) { 837 port = &dev->port[i]; 838 deref_port(port); 839 wait_for_completion(&port->comp); 840 } 841 842 kfree(dev); 843 } 844 845 int mcast_init(void) 846 { 847 int ret; 848 849 mcast_wq = create_singlethread_workqueue("ib_mcast"); 850 if (!mcast_wq) 851 return -ENOMEM; 852 853 ib_sa_register_client(&sa_client); 854 855 ret = ib_register_client(&mcast_client); 856 if (ret) 857 goto err; 858 return 0; 859 860 err: 861 ib_sa_unregister_client(&sa_client); 862 destroy_workqueue(mcast_wq); 863 return ret; 864 } 865 866 void mcast_cleanup(void) 867 { 868 ib_unregister_client(&mcast_client); 869 ib_sa_unregister_client(&sa_client); 870 destroy_workqueue(mcast_wq); 871 } 872