1 /* 2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include "ipoib.h" 36 37 #include <linux/module.h> 38 39 #include <linux/init.h> 40 #include <linux/slab.h> 41 #include <linux/kernel.h> 42 #include <linux/vmalloc.h> 43 44 #include <linux/if_arp.h> /* For ARPHRD_xxx */ 45 46 #include <linux/ip.h> 47 #include <linux/in.h> 48 49 #include <net/dst.h> 50 51 MODULE_AUTHOR("Roland Dreier"); 52 MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); 53 MODULE_LICENSE("Dual BSD/GPL"); 54 55 int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE; 56 int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE; 57 58 module_param_named(send_queue_size, ipoib_sendq_size, int, 0444); 59 MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); 60 module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); 61 MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); 62 63 static int lro; 64 module_param(lro, bool, 0444); 65 MODULE_PARM_DESC(lro, "Enable LRO (Large Receive Offload)"); 66 67 static int lro_max_aggr = IPOIB_LRO_MAX_AGGR; 68 module_param(lro_max_aggr, int, 0644); 69 MODULE_PARM_DESC(lro_max_aggr, "LRO: Max packets to be aggregated " 70 "(default = 64)"); 71 72 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 73 int ipoib_debug_level; 74 75 module_param_named(debug_level, ipoib_debug_level, int, 0644); 76 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); 77 #endif 78 79 struct ipoib_path_iter { 80 struct net_device *dev; 81 struct ipoib_path path; 82 }; 83 84 static const u8 ipv4_bcast_addr[] = { 85 0x00, 0xff, 0xff, 0xff, 86 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, 87 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff 88 }; 89 90 struct workqueue_struct *ipoib_workqueue; 91 92 struct ib_sa_client ipoib_sa_client; 93 94 static void ipoib_add_one(struct ib_device *device); 95 static void ipoib_remove_one(struct ib_device *device); 96 97 static struct ib_client ipoib_client = { 98 .name = "ipoib", 99 .add = ipoib_add_one, 100 .remove = ipoib_remove_one 101 }; 102 103 int ipoib_open(struct net_device *dev) 104 { 105 struct ipoib_dev_priv *priv = netdev_priv(dev); 106 107 ipoib_dbg(priv, "bringing up interface\n"); 108 109 napi_enable(&priv->napi); 110 set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 111 112 if (ipoib_pkey_dev_delay_open(dev)) 113 return 0; 114 115 if (ipoib_ib_dev_open(dev)) { 116 napi_disable(&priv->napi); 117 return -EINVAL; 118 } 119 120 if (ipoib_ib_dev_up(dev)) { 121 ipoib_ib_dev_stop(dev, 1); 122 napi_disable(&priv->napi); 123 return -EINVAL; 124 } 125 126 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 127 struct ipoib_dev_priv *cpriv; 128 129 /* Bring up any child interfaces too */ 130 mutex_lock(&priv->vlan_mutex); 131 list_for_each_entry(cpriv, &priv->child_intfs, list) { 132 int flags; 133 134 flags = cpriv->dev->flags; 135 if (flags & IFF_UP) 136 continue; 137 138 dev_change_flags(cpriv->dev, flags | IFF_UP); 139 } 140 mutex_unlock(&priv->vlan_mutex); 141 } 142 143 netif_start_queue(dev); 144 145 return 0; 146 } 147 148 static int ipoib_stop(struct net_device *dev) 149 { 150 struct ipoib_dev_priv *priv = netdev_priv(dev); 151 152 ipoib_dbg(priv, "stopping interface\n"); 153 154 clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 155 napi_disable(&priv->napi); 156 157 netif_stop_queue(dev); 158 159 /* 160 * Now flush workqueue to make sure a scheduled task doesn't 161 * bring our internal state back up. 162 */ 163 flush_workqueue(ipoib_workqueue); 164 165 ipoib_ib_dev_down(dev, 1); 166 ipoib_ib_dev_stop(dev, 1); 167 168 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 169 struct ipoib_dev_priv *cpriv; 170 171 /* Bring down any child interfaces too */ 172 mutex_lock(&priv->vlan_mutex); 173 list_for_each_entry(cpriv, &priv->child_intfs, list) { 174 int flags; 175 176 flags = cpriv->dev->flags; 177 if (!(flags & IFF_UP)) 178 continue; 179 180 dev_change_flags(cpriv->dev, flags & ~IFF_UP); 181 } 182 mutex_unlock(&priv->vlan_mutex); 183 } 184 185 return 0; 186 } 187 188 static int ipoib_change_mtu(struct net_device *dev, int new_mtu) 189 { 190 struct ipoib_dev_priv *priv = netdev_priv(dev); 191 192 /* dev->mtu > 2K ==> connected mode */ 193 if (ipoib_cm_admin_enabled(dev)) { 194 if (new_mtu > ipoib_cm_max_mtu(dev)) 195 return -EINVAL; 196 197 if (new_mtu > priv->mcast_mtu) 198 ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", 199 priv->mcast_mtu); 200 201 dev->mtu = new_mtu; 202 return 0; 203 } 204 205 if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) 206 return -EINVAL; 207 208 priv->admin_mtu = new_mtu; 209 210 dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); 211 212 return 0; 213 } 214 215 static struct ipoib_path *__path_find(struct net_device *dev, void *gid) 216 { 217 struct ipoib_dev_priv *priv = netdev_priv(dev); 218 struct rb_node *n = priv->path_tree.rb_node; 219 struct ipoib_path *path; 220 int ret; 221 222 while (n) { 223 path = rb_entry(n, struct ipoib_path, rb_node); 224 225 ret = memcmp(gid, path->pathrec.dgid.raw, 226 sizeof (union ib_gid)); 227 228 if (ret < 0) 229 n = n->rb_left; 230 else if (ret > 0) 231 n = n->rb_right; 232 else 233 return path; 234 } 235 236 return NULL; 237 } 238 239 static int __path_add(struct net_device *dev, struct ipoib_path *path) 240 { 241 struct ipoib_dev_priv *priv = netdev_priv(dev); 242 struct rb_node **n = &priv->path_tree.rb_node; 243 struct rb_node *pn = NULL; 244 struct ipoib_path *tpath; 245 int ret; 246 247 while (*n) { 248 pn = *n; 249 tpath = rb_entry(pn, struct ipoib_path, rb_node); 250 251 ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw, 252 sizeof (union ib_gid)); 253 if (ret < 0) 254 n = &pn->rb_left; 255 else if (ret > 0) 256 n = &pn->rb_right; 257 else 258 return -EEXIST; 259 } 260 261 rb_link_node(&path->rb_node, pn, n); 262 rb_insert_color(&path->rb_node, &priv->path_tree); 263 264 list_add_tail(&path->list, &priv->path_list); 265 266 return 0; 267 } 268 269 static void path_free(struct net_device *dev, struct ipoib_path *path) 270 { 271 struct ipoib_dev_priv *priv = netdev_priv(dev); 272 struct ipoib_neigh *neigh, *tn; 273 struct sk_buff *skb; 274 unsigned long flags; 275 276 while ((skb = __skb_dequeue(&path->queue))) 277 dev_kfree_skb_irq(skb); 278 279 spin_lock_irqsave(&priv->lock, flags); 280 281 list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) { 282 /* 283 * It's safe to call ipoib_put_ah() inside priv->lock 284 * here, because we know that path->ah will always 285 * hold one more reference, so ipoib_put_ah() will 286 * never do more than decrement the ref count. 287 */ 288 if (neigh->ah) 289 ipoib_put_ah(neigh->ah); 290 291 ipoib_neigh_free(dev, neigh); 292 } 293 294 spin_unlock_irqrestore(&priv->lock, flags); 295 296 if (path->ah) 297 ipoib_put_ah(path->ah); 298 299 kfree(path); 300 } 301 302 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 303 304 struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev) 305 { 306 struct ipoib_path_iter *iter; 307 308 iter = kmalloc(sizeof *iter, GFP_KERNEL); 309 if (!iter) 310 return NULL; 311 312 iter->dev = dev; 313 memset(iter->path.pathrec.dgid.raw, 0, 16); 314 315 if (ipoib_path_iter_next(iter)) { 316 kfree(iter); 317 return NULL; 318 } 319 320 return iter; 321 } 322 323 int ipoib_path_iter_next(struct ipoib_path_iter *iter) 324 { 325 struct ipoib_dev_priv *priv = netdev_priv(iter->dev); 326 struct rb_node *n; 327 struct ipoib_path *path; 328 int ret = 1; 329 330 spin_lock_irq(&priv->lock); 331 332 n = rb_first(&priv->path_tree); 333 334 while (n) { 335 path = rb_entry(n, struct ipoib_path, rb_node); 336 337 if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw, 338 sizeof (union ib_gid)) < 0) { 339 iter->path = *path; 340 ret = 0; 341 break; 342 } 343 344 n = rb_next(n); 345 } 346 347 spin_unlock_irq(&priv->lock); 348 349 return ret; 350 } 351 352 void ipoib_path_iter_read(struct ipoib_path_iter *iter, 353 struct ipoib_path *path) 354 { 355 *path = iter->path; 356 } 357 358 #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ 359 360 void ipoib_mark_paths_invalid(struct net_device *dev) 361 { 362 struct ipoib_dev_priv *priv = netdev_priv(dev); 363 struct ipoib_path *path, *tp; 364 365 spin_lock_irq(&priv->lock); 366 367 list_for_each_entry_safe(path, tp, &priv->path_list, list) { 368 ipoib_dbg(priv, "mark path LID 0x%04x GID " IPOIB_GID_FMT " invalid\n", 369 be16_to_cpu(path->pathrec.dlid), 370 IPOIB_GID_ARG(path->pathrec.dgid)); 371 path->valid = 0; 372 } 373 374 spin_unlock_irq(&priv->lock); 375 } 376 377 void ipoib_flush_paths(struct net_device *dev) 378 { 379 struct ipoib_dev_priv *priv = netdev_priv(dev); 380 struct ipoib_path *path, *tp; 381 LIST_HEAD(remove_list); 382 383 spin_lock_irq(&priv->tx_lock); 384 spin_lock(&priv->lock); 385 386 list_splice_init(&priv->path_list, &remove_list); 387 388 list_for_each_entry(path, &remove_list, list) 389 rb_erase(&path->rb_node, &priv->path_tree); 390 391 list_for_each_entry_safe(path, tp, &remove_list, list) { 392 if (path->query) 393 ib_sa_cancel_query(path->query_id, path->query); 394 spin_unlock(&priv->lock); 395 spin_unlock_irq(&priv->tx_lock); 396 wait_for_completion(&path->done); 397 path_free(dev, path); 398 spin_lock_irq(&priv->tx_lock); 399 spin_lock(&priv->lock); 400 } 401 spin_unlock(&priv->lock); 402 spin_unlock_irq(&priv->tx_lock); 403 } 404 405 static void path_rec_completion(int status, 406 struct ib_sa_path_rec *pathrec, 407 void *path_ptr) 408 { 409 struct ipoib_path *path = path_ptr; 410 struct net_device *dev = path->dev; 411 struct ipoib_dev_priv *priv = netdev_priv(dev); 412 struct ipoib_ah *ah = NULL; 413 struct ipoib_ah *old_ah; 414 struct ipoib_neigh *neigh, *tn; 415 struct sk_buff_head skqueue; 416 struct sk_buff *skb; 417 unsigned long flags; 418 419 if (!status) 420 ipoib_dbg(priv, "PathRec LID 0x%04x for GID " IPOIB_GID_FMT "\n", 421 be16_to_cpu(pathrec->dlid), IPOIB_GID_ARG(pathrec->dgid)); 422 else 423 ipoib_dbg(priv, "PathRec status %d for GID " IPOIB_GID_FMT "\n", 424 status, IPOIB_GID_ARG(path->pathrec.dgid)); 425 426 skb_queue_head_init(&skqueue); 427 428 if (!status) { 429 struct ib_ah_attr av; 430 431 if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av)) 432 ah = ipoib_create_ah(dev, priv->pd, &av); 433 } 434 435 spin_lock_irqsave(&priv->lock, flags); 436 437 old_ah = path->ah; 438 path->ah = ah; 439 440 if (ah) { 441 path->pathrec = *pathrec; 442 443 ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n", 444 ah, be16_to_cpu(pathrec->dlid), pathrec->sl); 445 446 while ((skb = __skb_dequeue(&path->queue))) 447 __skb_queue_tail(&skqueue, skb); 448 449 list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) { 450 if (neigh->ah) { 451 WARN_ON(neigh->ah != old_ah); 452 /* 453 * Dropping the ah reference inside 454 * priv->lock is safe here, because we 455 * will hold one more reference from 456 * the original value of path->ah (ie 457 * old_ah). 458 */ 459 ipoib_put_ah(neigh->ah); 460 } 461 kref_get(&path->ah->ref); 462 neigh->ah = path->ah; 463 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, 464 sizeof(union ib_gid)); 465 466 if (ipoib_cm_enabled(dev, neigh->neighbour)) { 467 if (!ipoib_cm_get(neigh)) 468 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, 469 path, 470 neigh)); 471 if (!ipoib_cm_get(neigh)) { 472 list_del(&neigh->list); 473 if (neigh->ah) 474 ipoib_put_ah(neigh->ah); 475 ipoib_neigh_free(dev, neigh); 476 continue; 477 } 478 } 479 480 while ((skb = __skb_dequeue(&neigh->queue))) 481 __skb_queue_tail(&skqueue, skb); 482 } 483 path->valid = 1; 484 } 485 486 path->query = NULL; 487 complete(&path->done); 488 489 spin_unlock_irqrestore(&priv->lock, flags); 490 491 if (old_ah) 492 ipoib_put_ah(old_ah); 493 494 while ((skb = __skb_dequeue(&skqueue))) { 495 skb->dev = dev; 496 if (dev_queue_xmit(skb)) 497 ipoib_warn(priv, "dev_queue_xmit failed " 498 "to requeue packet\n"); 499 } 500 } 501 502 static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid) 503 { 504 struct ipoib_dev_priv *priv = netdev_priv(dev); 505 struct ipoib_path *path; 506 507 if (!priv->broadcast) 508 return NULL; 509 510 path = kzalloc(sizeof *path, GFP_ATOMIC); 511 if (!path) 512 return NULL; 513 514 path->dev = dev; 515 516 skb_queue_head_init(&path->queue); 517 518 INIT_LIST_HEAD(&path->neigh_list); 519 520 memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid)); 521 path->pathrec.sgid = priv->local_gid; 522 path->pathrec.pkey = cpu_to_be16(priv->pkey); 523 path->pathrec.numb_path = 1; 524 path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class; 525 526 return path; 527 } 528 529 static int path_rec_start(struct net_device *dev, 530 struct ipoib_path *path) 531 { 532 struct ipoib_dev_priv *priv = netdev_priv(dev); 533 534 ipoib_dbg(priv, "Start path record lookup for " IPOIB_GID_FMT "\n", 535 IPOIB_GID_ARG(path->pathrec.dgid)); 536 537 init_completion(&path->done); 538 539 path->query_id = 540 ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port, 541 &path->pathrec, 542 IB_SA_PATH_REC_DGID | 543 IB_SA_PATH_REC_SGID | 544 IB_SA_PATH_REC_NUMB_PATH | 545 IB_SA_PATH_REC_TRAFFIC_CLASS | 546 IB_SA_PATH_REC_PKEY, 547 1000, GFP_ATOMIC, 548 path_rec_completion, 549 path, &path->query); 550 if (path->query_id < 0) { 551 ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id); 552 path->query = NULL; 553 return path->query_id; 554 } 555 556 return 0; 557 } 558 559 static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) 560 { 561 struct ipoib_dev_priv *priv = netdev_priv(dev); 562 struct ipoib_path *path; 563 struct ipoib_neigh *neigh; 564 565 neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev); 566 if (!neigh) { 567 ++dev->stats.tx_dropped; 568 dev_kfree_skb_any(skb); 569 return; 570 } 571 572 /* 573 * We can only be called from ipoib_start_xmit, so we're 574 * inside tx_lock -- no need to save/restore flags. 575 */ 576 spin_lock(&priv->lock); 577 578 path = __path_find(dev, skb->dst->neighbour->ha + 4); 579 if (!path) { 580 path = path_rec_create(dev, skb->dst->neighbour->ha + 4); 581 if (!path) 582 goto err_path; 583 584 __path_add(dev, path); 585 } 586 587 list_add_tail(&neigh->list, &path->neigh_list); 588 589 if (path->ah) { 590 kref_get(&path->ah->ref); 591 neigh->ah = path->ah; 592 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, 593 sizeof(union ib_gid)); 594 595 if (ipoib_cm_enabled(dev, neigh->neighbour)) { 596 if (!ipoib_cm_get(neigh)) 597 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh)); 598 if (!ipoib_cm_get(neigh)) { 599 list_del(&neigh->list); 600 if (neigh->ah) 601 ipoib_put_ah(neigh->ah); 602 ipoib_neigh_free(dev, neigh); 603 goto err_drop; 604 } 605 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) 606 __skb_queue_tail(&neigh->queue, skb); 607 else { 608 ipoib_warn(priv, "queue length limit %d. Packet drop.\n", 609 skb_queue_len(&neigh->queue)); 610 goto err_drop; 611 } 612 } else 613 ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha)); 614 } else { 615 neigh->ah = NULL; 616 617 if (!path->query && path_rec_start(dev, path)) 618 goto err_list; 619 620 __skb_queue_tail(&neigh->queue, skb); 621 } 622 623 spin_unlock(&priv->lock); 624 return; 625 626 err_list: 627 list_del(&neigh->list); 628 629 err_path: 630 ipoib_neigh_free(dev, neigh); 631 err_drop: 632 ++dev->stats.tx_dropped; 633 dev_kfree_skb_any(skb); 634 635 spin_unlock(&priv->lock); 636 } 637 638 static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev) 639 { 640 struct ipoib_dev_priv *priv = netdev_priv(skb->dev); 641 642 /* Look up path record for unicasts */ 643 if (skb->dst->neighbour->ha[4] != 0xff) { 644 neigh_add_path(skb, dev); 645 return; 646 } 647 648 /* Add in the P_Key for multicasts */ 649 skb->dst->neighbour->ha[8] = (priv->pkey >> 8) & 0xff; 650 skb->dst->neighbour->ha[9] = priv->pkey & 0xff; 651 ipoib_mcast_send(dev, skb->dst->neighbour->ha + 4, skb); 652 } 653 654 static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, 655 struct ipoib_pseudoheader *phdr) 656 { 657 struct ipoib_dev_priv *priv = netdev_priv(dev); 658 struct ipoib_path *path; 659 660 /* 661 * We can only be called from ipoib_start_xmit, so we're 662 * inside tx_lock -- no need to save/restore flags. 663 */ 664 spin_lock(&priv->lock); 665 666 path = __path_find(dev, phdr->hwaddr + 4); 667 if (!path || !path->valid) { 668 if (!path) 669 path = path_rec_create(dev, phdr->hwaddr + 4); 670 if (path) { 671 /* put pseudoheader back on for next time */ 672 skb_push(skb, sizeof *phdr); 673 __skb_queue_tail(&path->queue, skb); 674 675 if (path_rec_start(dev, path)) { 676 spin_unlock(&priv->lock); 677 path_free(dev, path); 678 return; 679 } else 680 __path_add(dev, path); 681 } else { 682 ++dev->stats.tx_dropped; 683 dev_kfree_skb_any(skb); 684 } 685 686 spin_unlock(&priv->lock); 687 return; 688 } 689 690 if (path->ah) { 691 ipoib_dbg(priv, "Send unicast ARP to %04x\n", 692 be16_to_cpu(path->pathrec.dlid)); 693 694 ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr)); 695 } else if ((path->query || !path_rec_start(dev, path)) && 696 skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { 697 /* put pseudoheader back on for next time */ 698 skb_push(skb, sizeof *phdr); 699 __skb_queue_tail(&path->queue, skb); 700 } else { 701 ++dev->stats.tx_dropped; 702 dev_kfree_skb_any(skb); 703 } 704 705 spin_unlock(&priv->lock); 706 } 707 708 static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) 709 { 710 struct ipoib_dev_priv *priv = netdev_priv(dev); 711 struct ipoib_neigh *neigh; 712 unsigned long flags; 713 714 if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags))) 715 return NETDEV_TX_LOCKED; 716 717 if (likely(skb->dst && skb->dst->neighbour)) { 718 if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { 719 ipoib_path_lookup(skb, dev); 720 goto out; 721 } 722 723 neigh = *to_ipoib_neigh(skb->dst->neighbour); 724 725 if (neigh->ah) 726 if (unlikely((memcmp(&neigh->dgid.raw, 727 skb->dst->neighbour->ha + 4, 728 sizeof(union ib_gid))) || 729 (neigh->dev != dev))) { 730 spin_lock(&priv->lock); 731 /* 732 * It's safe to call ipoib_put_ah() inside 733 * priv->lock here, because we know that 734 * path->ah will always hold one more reference, 735 * so ipoib_put_ah() will never do more than 736 * decrement the ref count. 737 */ 738 ipoib_put_ah(neigh->ah); 739 list_del(&neigh->list); 740 ipoib_neigh_free(dev, neigh); 741 spin_unlock(&priv->lock); 742 ipoib_path_lookup(skb, dev); 743 goto out; 744 } 745 746 if (ipoib_cm_get(neigh)) { 747 if (ipoib_cm_up(neigh)) { 748 ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); 749 goto out; 750 } 751 } else if (neigh->ah) { 752 ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha)); 753 goto out; 754 } 755 756 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { 757 spin_lock(&priv->lock); 758 __skb_queue_tail(&neigh->queue, skb); 759 spin_unlock(&priv->lock); 760 } else { 761 ++dev->stats.tx_dropped; 762 dev_kfree_skb_any(skb); 763 } 764 } else { 765 struct ipoib_pseudoheader *phdr = 766 (struct ipoib_pseudoheader *) skb->data; 767 skb_pull(skb, sizeof *phdr); 768 769 if (phdr->hwaddr[4] == 0xff) { 770 /* Add in the P_Key for multicast*/ 771 phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff; 772 phdr->hwaddr[9] = priv->pkey & 0xff; 773 774 ipoib_mcast_send(dev, phdr->hwaddr + 4, skb); 775 } else { 776 /* unicast GID -- should be ARP or RARP reply */ 777 778 if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) && 779 (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) { 780 ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x " 781 IPOIB_GID_FMT "\n", 782 skb->dst ? "neigh" : "dst", 783 be16_to_cpup((__be16 *) skb->data), 784 IPOIB_QPN(phdr->hwaddr), 785 IPOIB_GID_RAW_ARG(phdr->hwaddr + 4)); 786 dev_kfree_skb_any(skb); 787 ++dev->stats.tx_dropped; 788 goto out; 789 } 790 791 unicast_arp_send(skb, dev, phdr); 792 } 793 } 794 795 out: 796 spin_unlock_irqrestore(&priv->tx_lock, flags); 797 798 return NETDEV_TX_OK; 799 } 800 801 static void ipoib_timeout(struct net_device *dev) 802 { 803 struct ipoib_dev_priv *priv = netdev_priv(dev); 804 805 ipoib_warn(priv, "transmit timeout: latency %d msecs\n", 806 jiffies_to_msecs(jiffies - dev->trans_start)); 807 ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n", 808 netif_queue_stopped(dev), 809 priv->tx_head, priv->tx_tail); 810 /* XXX reset QP, etc. */ 811 } 812 813 static int ipoib_hard_header(struct sk_buff *skb, 814 struct net_device *dev, 815 unsigned short type, 816 const void *daddr, const void *saddr, unsigned len) 817 { 818 struct ipoib_header *header; 819 820 header = (struct ipoib_header *) skb_push(skb, sizeof *header); 821 822 header->proto = htons(type); 823 header->reserved = 0; 824 825 /* 826 * If we don't have a neighbour structure, stuff the 827 * destination address onto the front of the skb so we can 828 * figure out where to send the packet later. 829 */ 830 if ((!skb->dst || !skb->dst->neighbour) && daddr) { 831 struct ipoib_pseudoheader *phdr = 832 (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr); 833 memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN); 834 } 835 836 return 0; 837 } 838 839 static void ipoib_set_mcast_list(struct net_device *dev) 840 { 841 struct ipoib_dev_priv *priv = netdev_priv(dev); 842 843 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { 844 ipoib_dbg(priv, "IPOIB_FLAG_OPER_UP not set"); 845 return; 846 } 847 848 queue_work(ipoib_workqueue, &priv->restart_task); 849 } 850 851 static void ipoib_neigh_cleanup(struct neighbour *n) 852 { 853 struct ipoib_neigh *neigh; 854 struct ipoib_dev_priv *priv = netdev_priv(n->dev); 855 unsigned long flags; 856 struct ipoib_ah *ah = NULL; 857 858 neigh = *to_ipoib_neigh(n); 859 if (neigh) 860 priv = netdev_priv(neigh->dev); 861 else 862 return; 863 ipoib_dbg(priv, 864 "neigh_cleanup for %06x " IPOIB_GID_FMT "\n", 865 IPOIB_QPN(n->ha), 866 IPOIB_GID_RAW_ARG(n->ha + 4)); 867 868 spin_lock_irqsave(&priv->lock, flags); 869 870 if (neigh->ah) 871 ah = neigh->ah; 872 list_del(&neigh->list); 873 ipoib_neigh_free(n->dev, neigh); 874 875 spin_unlock_irqrestore(&priv->lock, flags); 876 877 if (ah) 878 ipoib_put_ah(ah); 879 } 880 881 struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour, 882 struct net_device *dev) 883 { 884 struct ipoib_neigh *neigh; 885 886 neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); 887 if (!neigh) 888 return NULL; 889 890 neigh->neighbour = neighbour; 891 neigh->dev = dev; 892 *to_ipoib_neigh(neighbour) = neigh; 893 skb_queue_head_init(&neigh->queue); 894 ipoib_cm_set(neigh, NULL); 895 896 return neigh; 897 } 898 899 void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) 900 { 901 struct sk_buff *skb; 902 *to_ipoib_neigh(neigh->neighbour) = NULL; 903 while ((skb = __skb_dequeue(&neigh->queue))) { 904 ++dev->stats.tx_dropped; 905 dev_kfree_skb_any(skb); 906 } 907 if (ipoib_cm_get(neigh)) 908 ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); 909 kfree(neigh); 910 } 911 912 static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) 913 { 914 parms->neigh_cleanup = ipoib_neigh_cleanup; 915 916 return 0; 917 } 918 919 int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) 920 { 921 struct ipoib_dev_priv *priv = netdev_priv(dev); 922 923 /* Allocate RX/TX "rings" to hold queued skbs */ 924 priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, 925 GFP_KERNEL); 926 if (!priv->rx_ring) { 927 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", 928 ca->name, ipoib_recvq_size); 929 goto out; 930 } 931 932 priv->tx_ring = vmalloc(ipoib_sendq_size * sizeof *priv->tx_ring); 933 if (!priv->tx_ring) { 934 printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", 935 ca->name, ipoib_sendq_size); 936 goto out_rx_ring_cleanup; 937 } 938 memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring); 939 940 /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ 941 942 if (ipoib_ib_dev_init(dev, ca, port)) 943 goto out_tx_ring_cleanup; 944 945 return 0; 946 947 out_tx_ring_cleanup: 948 vfree(priv->tx_ring); 949 950 out_rx_ring_cleanup: 951 kfree(priv->rx_ring); 952 953 out: 954 return -ENOMEM; 955 } 956 957 void ipoib_dev_cleanup(struct net_device *dev) 958 { 959 struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv; 960 961 ipoib_delete_debug_files(dev); 962 963 /* Delete any child interfaces first */ 964 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { 965 unregister_netdev(cpriv->dev); 966 ipoib_dev_cleanup(cpriv->dev); 967 free_netdev(cpriv->dev); 968 } 969 970 ipoib_ib_dev_cleanup(dev); 971 972 kfree(priv->rx_ring); 973 vfree(priv->tx_ring); 974 975 priv->rx_ring = NULL; 976 priv->tx_ring = NULL; 977 } 978 979 static const struct header_ops ipoib_header_ops = { 980 .create = ipoib_hard_header, 981 }; 982 983 static int get_skb_hdr(struct sk_buff *skb, void **iphdr, 984 void **tcph, u64 *hdr_flags, void *priv) 985 { 986 unsigned int ip_len; 987 struct iphdr *iph; 988 989 if (unlikely(skb->protocol != htons(ETH_P_IP))) 990 return -1; 991 992 /* 993 * In the future we may add an else clause that verifies the 994 * checksum and allows devices which do not calculate checksum 995 * to use LRO. 996 */ 997 if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY)) 998 return -1; 999 1000 /* Check for non-TCP packet */ 1001 skb_reset_network_header(skb); 1002 iph = ip_hdr(skb); 1003 if (iph->protocol != IPPROTO_TCP) 1004 return -1; 1005 1006 ip_len = ip_hdrlen(skb); 1007 skb_set_transport_header(skb, ip_len); 1008 *tcph = tcp_hdr(skb); 1009 1010 /* check if IP header and TCP header are complete */ 1011 if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb)) 1012 return -1; 1013 1014 *hdr_flags = LRO_IPV4 | LRO_TCP; 1015 *iphdr = iph; 1016 1017 return 0; 1018 } 1019 1020 static void ipoib_lro_setup(struct ipoib_dev_priv *priv) 1021 { 1022 priv->lro.lro_mgr.max_aggr = lro_max_aggr; 1023 priv->lro.lro_mgr.max_desc = IPOIB_MAX_LRO_DESCRIPTORS; 1024 priv->lro.lro_mgr.lro_arr = priv->lro.lro_desc; 1025 priv->lro.lro_mgr.get_skb_header = get_skb_hdr; 1026 priv->lro.lro_mgr.features = LRO_F_NAPI; 1027 priv->lro.lro_mgr.dev = priv->dev; 1028 priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; 1029 } 1030 1031 static void ipoib_setup(struct net_device *dev) 1032 { 1033 struct ipoib_dev_priv *priv = netdev_priv(dev); 1034 1035 dev->open = ipoib_open; 1036 dev->stop = ipoib_stop; 1037 dev->change_mtu = ipoib_change_mtu; 1038 dev->hard_start_xmit = ipoib_start_xmit; 1039 dev->tx_timeout = ipoib_timeout; 1040 dev->header_ops = &ipoib_header_ops; 1041 dev->set_multicast_list = ipoib_set_mcast_list; 1042 dev->neigh_setup = ipoib_neigh_setup_dev; 1043 1044 ipoib_set_ethtool_ops(dev); 1045 1046 netif_napi_add(dev, &priv->napi, ipoib_poll, 100); 1047 1048 dev->watchdog_timeo = HZ; 1049 1050 dev->flags |= IFF_BROADCAST | IFF_MULTICAST; 1051 1052 /* 1053 * We add in INFINIBAND_ALEN to allow for the destination 1054 * address "pseudoheader" for skbs without neighbour struct. 1055 */ 1056 dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; 1057 dev->addr_len = INFINIBAND_ALEN; 1058 dev->type = ARPHRD_INFINIBAND; 1059 dev->tx_queue_len = ipoib_sendq_size * 2; 1060 dev->features = (NETIF_F_VLAN_CHALLENGED | 1061 NETIF_F_LLTX | 1062 NETIF_F_HIGHDMA); 1063 1064 memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); 1065 1066 netif_carrier_off(dev); 1067 1068 priv->dev = dev; 1069 1070 ipoib_lro_setup(priv); 1071 1072 spin_lock_init(&priv->lock); 1073 spin_lock_init(&priv->tx_lock); 1074 1075 mutex_init(&priv->vlan_mutex); 1076 1077 INIT_LIST_HEAD(&priv->path_list); 1078 INIT_LIST_HEAD(&priv->child_intfs); 1079 INIT_LIST_HEAD(&priv->dead_ahs); 1080 INIT_LIST_HEAD(&priv->multicast_list); 1081 1082 INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); 1083 INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); 1084 INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); 1085 INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); 1086 INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); 1087 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); 1088 INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); 1089 } 1090 1091 struct ipoib_dev_priv *ipoib_intf_alloc(const char *name) 1092 { 1093 struct net_device *dev; 1094 1095 dev = alloc_netdev((int) sizeof (struct ipoib_dev_priv), name, 1096 ipoib_setup); 1097 if (!dev) 1098 return NULL; 1099 1100 return netdev_priv(dev); 1101 } 1102 1103 static ssize_t show_pkey(struct device *dev, 1104 struct device_attribute *attr, char *buf) 1105 { 1106 struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev)); 1107 1108 return sprintf(buf, "0x%04x\n", priv->pkey); 1109 } 1110 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 1111 1112 static ssize_t show_umcast(struct device *dev, 1113 struct device_attribute *attr, char *buf) 1114 { 1115 struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev)); 1116 1117 return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); 1118 } 1119 1120 static ssize_t set_umcast(struct device *dev, 1121 struct device_attribute *attr, 1122 const char *buf, size_t count) 1123 { 1124 struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev)); 1125 unsigned long umcast_val = simple_strtoul(buf, NULL, 0); 1126 1127 if (umcast_val > 0) { 1128 set_bit(IPOIB_FLAG_UMCAST, &priv->flags); 1129 ipoib_warn(priv, "ignoring multicast groups joined directly " 1130 "by userspace\n"); 1131 } else 1132 clear_bit(IPOIB_FLAG_UMCAST, &priv->flags); 1133 1134 return count; 1135 } 1136 static DEVICE_ATTR(umcast, S_IWUSR | S_IRUGO, show_umcast, set_umcast); 1137 1138 int ipoib_add_umcast_attr(struct net_device *dev) 1139 { 1140 return device_create_file(&dev->dev, &dev_attr_umcast); 1141 } 1142 1143 static ssize_t create_child(struct device *dev, 1144 struct device_attribute *attr, 1145 const char *buf, size_t count) 1146 { 1147 int pkey; 1148 int ret; 1149 1150 if (sscanf(buf, "%i", &pkey) != 1) 1151 return -EINVAL; 1152 1153 if (pkey < 0 || pkey > 0xffff) 1154 return -EINVAL; 1155 1156 /* 1157 * Set the full membership bit, so that we join the right 1158 * broadcast group, etc. 1159 */ 1160 pkey |= 0x8000; 1161 1162 ret = ipoib_vlan_add(to_net_dev(dev), pkey); 1163 1164 return ret ? ret : count; 1165 } 1166 static DEVICE_ATTR(create_child, S_IWUGO, NULL, create_child); 1167 1168 static ssize_t delete_child(struct device *dev, 1169 struct device_attribute *attr, 1170 const char *buf, size_t count) 1171 { 1172 int pkey; 1173 int ret; 1174 1175 if (sscanf(buf, "%i", &pkey) != 1) 1176 return -EINVAL; 1177 1178 if (pkey < 0 || pkey > 0xffff) 1179 return -EINVAL; 1180 1181 ret = ipoib_vlan_delete(to_net_dev(dev), pkey); 1182 1183 return ret ? ret : count; 1184 1185 } 1186 static DEVICE_ATTR(delete_child, S_IWUGO, NULL, delete_child); 1187 1188 int ipoib_add_pkey_attr(struct net_device *dev) 1189 { 1190 return device_create_file(&dev->dev, &dev_attr_pkey); 1191 } 1192 1193 static struct net_device *ipoib_add_port(const char *format, 1194 struct ib_device *hca, u8 port) 1195 { 1196 struct ipoib_dev_priv *priv; 1197 struct ib_device_attr *device_attr; 1198 struct ib_port_attr attr; 1199 int result = -ENOMEM; 1200 1201 priv = ipoib_intf_alloc(format); 1202 if (!priv) 1203 goto alloc_mem_failed; 1204 1205 SET_NETDEV_DEV(priv->dev, hca->dma_device); 1206 1207 if (!ib_query_port(hca, port, &attr)) 1208 priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); 1209 else { 1210 printk(KERN_WARNING "%s: ib_query_port %d failed\n", 1211 hca->name, port); 1212 goto device_init_failed; 1213 } 1214 1215 /* MTU will be reset when mcast join happens */ 1216 priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); 1217 priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; 1218 1219 result = ib_query_pkey(hca, port, 0, &priv->pkey); 1220 if (result) { 1221 printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", 1222 hca->name, port, result); 1223 goto device_init_failed; 1224 } 1225 1226 device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL); 1227 if (!device_attr) { 1228 printk(KERN_WARNING "%s: allocation of %zu bytes failed\n", 1229 hca->name, sizeof *device_attr); 1230 goto device_init_failed; 1231 } 1232 1233 result = ib_query_device(hca, device_attr); 1234 if (result) { 1235 printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n", 1236 hca->name, result); 1237 kfree(device_attr); 1238 goto device_init_failed; 1239 } 1240 priv->hca_caps = device_attr->device_cap_flags; 1241 1242 kfree(device_attr); 1243 1244 if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { 1245 set_bit(IPOIB_FLAG_CSUM, &priv->flags); 1246 priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; 1247 } 1248 1249 if (lro) 1250 priv->dev->features |= NETIF_F_LRO; 1251 1252 /* 1253 * Set the full membership bit, so that we join the right 1254 * broadcast group, etc. 1255 */ 1256 priv->pkey |= 0x8000; 1257 1258 priv->dev->broadcast[8] = priv->pkey >> 8; 1259 priv->dev->broadcast[9] = priv->pkey & 0xff; 1260 1261 result = ib_query_gid(hca, port, 0, &priv->local_gid); 1262 if (result) { 1263 printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", 1264 hca->name, port, result); 1265 goto device_init_failed; 1266 } else 1267 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); 1268 1269 result = ipoib_dev_init(priv->dev, hca, port); 1270 if (result < 0) { 1271 printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n", 1272 hca->name, port, result); 1273 goto device_init_failed; 1274 } 1275 1276 INIT_IB_EVENT_HANDLER(&priv->event_handler, 1277 priv->ca, ipoib_event); 1278 result = ib_register_event_handler(&priv->event_handler); 1279 if (result < 0) { 1280 printk(KERN_WARNING "%s: ib_register_event_handler failed for " 1281 "port %d (ret = %d)\n", 1282 hca->name, port, result); 1283 goto event_failed; 1284 } 1285 1286 if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO) 1287 priv->dev->features |= NETIF_F_TSO; 1288 1289 result = register_netdev(priv->dev); 1290 if (result) { 1291 printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n", 1292 hca->name, port, result); 1293 goto register_failed; 1294 } 1295 1296 ipoib_create_debug_files(priv->dev); 1297 1298 if (ipoib_cm_add_mode_attr(priv->dev)) 1299 goto sysfs_failed; 1300 if (ipoib_add_pkey_attr(priv->dev)) 1301 goto sysfs_failed; 1302 if (ipoib_add_umcast_attr(priv->dev)) 1303 goto sysfs_failed; 1304 if (device_create_file(&priv->dev->dev, &dev_attr_create_child)) 1305 goto sysfs_failed; 1306 if (device_create_file(&priv->dev->dev, &dev_attr_delete_child)) 1307 goto sysfs_failed; 1308 1309 return priv->dev; 1310 1311 sysfs_failed: 1312 ipoib_delete_debug_files(priv->dev); 1313 unregister_netdev(priv->dev); 1314 1315 register_failed: 1316 ib_unregister_event_handler(&priv->event_handler); 1317 flush_scheduled_work(); 1318 1319 event_failed: 1320 ipoib_dev_cleanup(priv->dev); 1321 1322 device_init_failed: 1323 free_netdev(priv->dev); 1324 1325 alloc_mem_failed: 1326 return ERR_PTR(result); 1327 } 1328 1329 static void ipoib_add_one(struct ib_device *device) 1330 { 1331 struct list_head *dev_list; 1332 struct net_device *dev; 1333 struct ipoib_dev_priv *priv; 1334 int s, e, p; 1335 1336 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) 1337 return; 1338 1339 dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); 1340 if (!dev_list) 1341 return; 1342 1343 INIT_LIST_HEAD(dev_list); 1344 1345 if (device->node_type == RDMA_NODE_IB_SWITCH) { 1346 s = 0; 1347 e = 0; 1348 } else { 1349 s = 1; 1350 e = device->phys_port_cnt; 1351 } 1352 1353 for (p = s; p <= e; ++p) { 1354 dev = ipoib_add_port("ib%d", device, p); 1355 if (!IS_ERR(dev)) { 1356 priv = netdev_priv(dev); 1357 list_add_tail(&priv->list, dev_list); 1358 } 1359 } 1360 1361 ib_set_client_data(device, &ipoib_client, dev_list); 1362 } 1363 1364 static void ipoib_remove_one(struct ib_device *device) 1365 { 1366 struct ipoib_dev_priv *priv, *tmp; 1367 struct list_head *dev_list; 1368 1369 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) 1370 return; 1371 1372 dev_list = ib_get_client_data(device, &ipoib_client); 1373 1374 list_for_each_entry_safe(priv, tmp, dev_list, list) { 1375 ib_unregister_event_handler(&priv->event_handler); 1376 flush_scheduled_work(); 1377 1378 unregister_netdev(priv->dev); 1379 ipoib_dev_cleanup(priv->dev); 1380 free_netdev(priv->dev); 1381 } 1382 1383 kfree(dev_list); 1384 } 1385 1386 static int __init ipoib_init_module(void) 1387 { 1388 int ret; 1389 1390 ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size); 1391 ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE); 1392 ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE); 1393 1394 ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); 1395 ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); 1396 ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE, 1397 IPOIB_MIN_QUEUE_SIZE)); 1398 #ifdef CONFIG_INFINIBAND_IPOIB_CM 1399 ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); 1400 #endif 1401 1402 /* 1403 * When copying small received packets, we only copy from the 1404 * linear data part of the SKB, so we rely on this condition. 1405 */ 1406 BUILD_BUG_ON(IPOIB_CM_COPYBREAK > IPOIB_CM_HEAD_SIZE); 1407 1408 ret = ipoib_register_debugfs(); 1409 if (ret) 1410 return ret; 1411 1412 /* 1413 * We create our own workqueue mainly because we want to be 1414 * able to flush it when devices are being removed. We can't 1415 * use schedule_work()/flush_scheduled_work() because both 1416 * unregister_netdev() and linkwatch_event take the rtnl lock, 1417 * so flush_scheduled_work() can deadlock during device 1418 * removal. 1419 */ 1420 ipoib_workqueue = create_singlethread_workqueue("ipoib"); 1421 if (!ipoib_workqueue) { 1422 ret = -ENOMEM; 1423 goto err_fs; 1424 } 1425 1426 ib_sa_register_client(&ipoib_sa_client); 1427 1428 ret = ib_register_client(&ipoib_client); 1429 if (ret) 1430 goto err_sa; 1431 1432 return 0; 1433 1434 err_sa: 1435 ib_sa_unregister_client(&ipoib_sa_client); 1436 destroy_workqueue(ipoib_workqueue); 1437 1438 err_fs: 1439 ipoib_unregister_debugfs(); 1440 1441 return ret; 1442 } 1443 1444 static void __exit ipoib_cleanup_module(void) 1445 { 1446 ib_unregister_client(&ipoib_client); 1447 ib_sa_unregister_client(&ipoib_sa_client); 1448 ipoib_unregister_debugfs(); 1449 destroy_workqueue(ipoib_workqueue); 1450 } 1451 1452 module_init(ipoib_init_module); 1453 module_exit(ipoib_cleanup_module); 1454