1 /* 2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of EITHER the GNU General Public License 6 * version 2 as published by the Free Software Foundation or the BSD 7 * 2-Clause License. This program is distributed in the hope that it 8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED 9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 10 * See the GNU General Public License version 2 for more details at 11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program available in the file COPYING in the main 15 * directory of this source tree. 16 * 17 * The BSD 2-Clause License 18 * 19 * Redistribution and use in source and binary forms, with or 20 * without modification, are permitted provided that the following 21 * conditions are met: 22 * 23 * - Redistributions of source code must retain the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer. 26 * 27 * - Redistributions in binary form must reproduce the above 28 * copyright notice, this list of conditions and the following 29 * disclaimer in the documentation and/or other materials 30 * provided with the distribution. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 43 * OF THE POSSIBILITY OF SUCH DAMAGE. 44 */ 45 46 #include <linux/errno.h> 47 #include <linux/inetdevice.h> 48 #include <linux/init.h> 49 #include <linux/module.h> 50 #include <linux/slab.h> 51 #include <rdma/ib_addr.h> 52 #include <rdma/ib_smi.h> 53 #include <rdma/ib_user_verbs.h> 54 #include <net/addrconf.h> 55 56 #include "pvrdma.h" 57 58 #define DRV_NAME "vmw_pvrdma" 59 #define DRV_VERSION "1.0.1.0-k" 60 61 static DEFINE_MUTEX(pvrdma_device_list_lock); 62 static LIST_HEAD(pvrdma_device_list); 63 static struct workqueue_struct *event_wq; 64 65 static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context); 66 static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); 67 68 static ssize_t hca_type_show(struct device *device, 69 struct device_attribute *attr, char *buf) 70 { 71 return sysfs_emit(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); 72 } 73 static DEVICE_ATTR_RO(hca_type); 74 75 static ssize_t hw_rev_show(struct device *device, 76 struct device_attribute *attr, char *buf) 77 { 78 return sysfs_emit(buf, "%d\n", PVRDMA_REV_ID); 79 } 80 static DEVICE_ATTR_RO(hw_rev); 81 82 static ssize_t board_id_show(struct device *device, 83 struct device_attribute *attr, char *buf) 84 { 85 return sysfs_emit(buf, "%d\n", PVRDMA_BOARD_ID); 86 } 87 static DEVICE_ATTR_RO(board_id); 88 89 static struct attribute *pvrdma_class_attributes[] = { 90 &dev_attr_hw_rev.attr, 91 &dev_attr_hca_type.attr, 92 &dev_attr_board_id.attr, 93 NULL, 94 }; 95 96 static const struct attribute_group pvrdma_attr_group = { 97 .attrs = pvrdma_class_attributes, 98 }; 99 100 static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str) 101 { 102 struct pvrdma_dev *dev = 103 container_of(device, struct pvrdma_dev, ib_dev); 104 snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d\n", 105 (int) (dev->dsr->caps.fw_ver >> 32), 106 (int) (dev->dsr->caps.fw_ver >> 16) & 0xffff, 107 (int) dev->dsr->caps.fw_ver & 0xffff); 108 } 109 110 static int pvrdma_init_device(struct pvrdma_dev *dev) 111 { 112 /* Initialize some device related stuff */ 113 spin_lock_init(&dev->cmd_lock); 114 sema_init(&dev->cmd_sema, 1); 115 atomic_set(&dev->num_qps, 0); 116 atomic_set(&dev->num_srqs, 0); 117 atomic_set(&dev->num_cqs, 0); 118 atomic_set(&dev->num_pds, 0); 119 atomic_set(&dev->num_ahs, 0); 120 121 return 0; 122 } 123 124 static int pvrdma_port_immutable(struct ib_device *ibdev, u32 port_num, 125 struct ib_port_immutable *immutable) 126 { 127 struct pvrdma_dev *dev = to_vdev(ibdev); 128 struct ib_port_attr attr; 129 int err; 130 131 if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V1) 132 immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE; 133 else if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V2) 134 immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; 135 136 err = ib_query_port(ibdev, port_num, &attr); 137 if (err) 138 return err; 139 140 immutable->pkey_tbl_len = attr.pkey_tbl_len; 141 immutable->gid_tbl_len = attr.gid_tbl_len; 142 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 143 return 0; 144 } 145 146 static const struct ib_device_ops pvrdma_dev_ops = { 147 .owner = THIS_MODULE, 148 .driver_id = RDMA_DRIVER_VMW_PVRDMA, 149 .uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION, 150 151 .add_gid = pvrdma_add_gid, 152 .alloc_mr = pvrdma_alloc_mr, 153 .alloc_pd = pvrdma_alloc_pd, 154 .alloc_ucontext = pvrdma_alloc_ucontext, 155 .create_ah = pvrdma_create_ah, 156 .create_cq = pvrdma_create_cq, 157 .create_qp = pvrdma_create_qp, 158 .dealloc_pd = pvrdma_dealloc_pd, 159 .dealloc_ucontext = pvrdma_dealloc_ucontext, 160 .del_gid = pvrdma_del_gid, 161 .dereg_mr = pvrdma_dereg_mr, 162 .destroy_ah = pvrdma_destroy_ah, 163 .destroy_cq = pvrdma_destroy_cq, 164 .destroy_qp = pvrdma_destroy_qp, 165 .device_group = &pvrdma_attr_group, 166 .get_dev_fw_str = pvrdma_get_fw_ver_str, 167 .get_dma_mr = pvrdma_get_dma_mr, 168 .get_link_layer = pvrdma_port_link_layer, 169 .get_port_immutable = pvrdma_port_immutable, 170 .map_mr_sg = pvrdma_map_mr_sg, 171 .mmap = pvrdma_mmap, 172 .modify_port = pvrdma_modify_port, 173 .modify_qp = pvrdma_modify_qp, 174 .poll_cq = pvrdma_poll_cq, 175 .post_recv = pvrdma_post_recv, 176 .post_send = pvrdma_post_send, 177 .query_device = pvrdma_query_device, 178 .query_gid = pvrdma_query_gid, 179 .query_pkey = pvrdma_query_pkey, 180 .query_port = pvrdma_query_port, 181 .query_qp = pvrdma_query_qp, 182 .reg_user_mr = pvrdma_reg_user_mr, 183 .req_notify_cq = pvrdma_req_notify_cq, 184 185 INIT_RDMA_OBJ_SIZE(ib_ah, pvrdma_ah, ibah), 186 INIT_RDMA_OBJ_SIZE(ib_cq, pvrdma_cq, ibcq), 187 INIT_RDMA_OBJ_SIZE(ib_pd, pvrdma_pd, ibpd), 188 INIT_RDMA_OBJ_SIZE(ib_ucontext, pvrdma_ucontext, ibucontext), 189 }; 190 191 static const struct ib_device_ops pvrdma_dev_srq_ops = { 192 .create_srq = pvrdma_create_srq, 193 .destroy_srq = pvrdma_destroy_srq, 194 .modify_srq = pvrdma_modify_srq, 195 .query_srq = pvrdma_query_srq, 196 197 INIT_RDMA_OBJ_SIZE(ib_srq, pvrdma_srq, ibsrq), 198 }; 199 200 static int pvrdma_register_device(struct pvrdma_dev *dev) 201 { 202 int ret = -1; 203 204 dev->ib_dev.node_guid = dev->dsr->caps.node_guid; 205 dev->sys_image_guid = dev->dsr->caps.sys_image_guid; 206 dev->flags = 0; 207 dev->ib_dev.num_comp_vectors = 1; 208 dev->ib_dev.dev.parent = &dev->pdev->dev; 209 210 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 211 dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; 212 213 ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_ops); 214 215 mutex_init(&dev->port_mutex); 216 spin_lock_init(&dev->desc_lock); 217 218 dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(struct pvrdma_cq *), 219 GFP_KERNEL); 220 if (!dev->cq_tbl) 221 return ret; 222 spin_lock_init(&dev->cq_tbl_lock); 223 224 dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(struct pvrdma_qp *), 225 GFP_KERNEL); 226 if (!dev->qp_tbl) 227 goto err_cq_free; 228 spin_lock_init(&dev->qp_tbl_lock); 229 230 /* Check if SRQ is supported by backend */ 231 if (dev->dsr->caps.max_srq) { 232 ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops); 233 234 dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, 235 sizeof(struct pvrdma_srq *), 236 GFP_KERNEL); 237 if (!dev->srq_tbl) 238 goto err_qp_free; 239 } 240 ret = ib_device_set_netdev(&dev->ib_dev, dev->netdev, 1); 241 if (ret) 242 goto err_srq_free; 243 spin_lock_init(&dev->srq_tbl_lock); 244 245 ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", &dev->pdev->dev); 246 if (ret) 247 goto err_srq_free; 248 249 dev->ib_active = true; 250 251 return 0; 252 253 err_srq_free: 254 kfree(dev->srq_tbl); 255 err_qp_free: 256 kfree(dev->qp_tbl); 257 err_cq_free: 258 kfree(dev->cq_tbl); 259 260 return ret; 261 } 262 263 static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id) 264 { 265 u32 icr = PVRDMA_INTR_CAUSE_RESPONSE; 266 struct pvrdma_dev *dev = dev_id; 267 268 dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n"); 269 270 if (!dev->pdev->msix_enabled) { 271 /* Legacy intr */ 272 icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR); 273 if (icr == 0) 274 return IRQ_NONE; 275 } 276 277 if (icr == PVRDMA_INTR_CAUSE_RESPONSE) 278 complete(&dev->cmd_done); 279 280 return IRQ_HANDLED; 281 } 282 283 static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type) 284 { 285 struct pvrdma_qp *qp; 286 unsigned long flags; 287 288 spin_lock_irqsave(&dev->qp_tbl_lock, flags); 289 qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp]; 290 if (qp) 291 refcount_inc(&qp->refcnt); 292 spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); 293 294 if (qp && qp->ibqp.event_handler) { 295 struct ib_qp *ibqp = &qp->ibqp; 296 struct ib_event e; 297 298 e.device = ibqp->device; 299 e.element.qp = ibqp; 300 e.event = type; /* 1:1 mapping for now. */ 301 ibqp->event_handler(&e, ibqp->qp_context); 302 } 303 if (qp) { 304 if (refcount_dec_and_test(&qp->refcnt)) 305 complete(&qp->free); 306 } 307 } 308 309 static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type) 310 { 311 struct pvrdma_cq *cq; 312 unsigned long flags; 313 314 spin_lock_irqsave(&dev->cq_tbl_lock, flags); 315 cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq]; 316 if (cq) 317 refcount_inc(&cq->refcnt); 318 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 319 320 if (cq && cq->ibcq.event_handler) { 321 struct ib_cq *ibcq = &cq->ibcq; 322 struct ib_event e; 323 324 e.device = ibcq->device; 325 e.element.cq = ibcq; 326 e.event = type; /* 1:1 mapping for now. */ 327 ibcq->event_handler(&e, ibcq->cq_context); 328 } 329 if (cq) { 330 if (refcount_dec_and_test(&cq->refcnt)) 331 complete(&cq->free); 332 } 333 } 334 335 static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type) 336 { 337 struct pvrdma_srq *srq; 338 unsigned long flags; 339 340 spin_lock_irqsave(&dev->srq_tbl_lock, flags); 341 if (dev->srq_tbl) 342 srq = dev->srq_tbl[srqn % dev->dsr->caps.max_srq]; 343 else 344 srq = NULL; 345 if (srq) 346 refcount_inc(&srq->refcnt); 347 spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); 348 349 if (srq && srq->ibsrq.event_handler) { 350 struct ib_srq *ibsrq = &srq->ibsrq; 351 struct ib_event e; 352 353 e.device = ibsrq->device; 354 e.element.srq = ibsrq; 355 e.event = type; /* 1:1 mapping for now. */ 356 ibsrq->event_handler(&e, ibsrq->srq_context); 357 } 358 if (srq) { 359 if (refcount_dec_and_test(&srq->refcnt)) 360 complete(&srq->free); 361 } 362 } 363 364 static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port, 365 enum ib_event_type event) 366 { 367 struct ib_event ib_event; 368 369 memset(&ib_event, 0, sizeof(ib_event)); 370 ib_event.device = &dev->ib_dev; 371 ib_event.element.port_num = port; 372 ib_event.event = event; 373 ib_dispatch_event(&ib_event); 374 } 375 376 static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type) 377 { 378 if (port < 1 || port > dev->dsr->caps.phys_port_cnt) { 379 dev_warn(&dev->pdev->dev, "event on port %d\n", port); 380 return; 381 } 382 383 pvrdma_dispatch_event(dev, port, type); 384 } 385 386 static inline struct pvrdma_eqe *get_eqe(struct pvrdma_dev *dev, unsigned int i) 387 { 388 return (struct pvrdma_eqe *)pvrdma_page_dir_get_ptr( 389 &dev->async_pdir, 390 PAGE_SIZE + 391 sizeof(struct pvrdma_eqe) * i); 392 } 393 394 static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id) 395 { 396 struct pvrdma_dev *dev = dev_id; 397 struct pvrdma_ring *ring = &dev->async_ring_state->rx; 398 int ring_slots = (dev->dsr->async_ring_pages.num_pages - 1) * 399 PAGE_SIZE / sizeof(struct pvrdma_eqe); 400 unsigned int head; 401 402 dev_dbg(&dev->pdev->dev, "interrupt 1 (async event) handler\n"); 403 404 /* 405 * Don't process events until the IB device is registered. Otherwise 406 * we'll try to ib_dispatch_event() on an invalid device. 407 */ 408 if (!dev->ib_active) 409 return IRQ_HANDLED; 410 411 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { 412 struct pvrdma_eqe *eqe; 413 414 eqe = get_eqe(dev, head); 415 416 switch (eqe->type) { 417 case PVRDMA_EVENT_QP_FATAL: 418 case PVRDMA_EVENT_QP_REQ_ERR: 419 case PVRDMA_EVENT_QP_ACCESS_ERR: 420 case PVRDMA_EVENT_COMM_EST: 421 case PVRDMA_EVENT_SQ_DRAINED: 422 case PVRDMA_EVENT_PATH_MIG: 423 case PVRDMA_EVENT_PATH_MIG_ERR: 424 case PVRDMA_EVENT_QP_LAST_WQE_REACHED: 425 pvrdma_qp_event(dev, eqe->info, eqe->type); 426 break; 427 428 case PVRDMA_EVENT_CQ_ERR: 429 pvrdma_cq_event(dev, eqe->info, eqe->type); 430 break; 431 432 case PVRDMA_EVENT_SRQ_ERR: 433 case PVRDMA_EVENT_SRQ_LIMIT_REACHED: 434 pvrdma_srq_event(dev, eqe->info, eqe->type); 435 break; 436 437 case PVRDMA_EVENT_PORT_ACTIVE: 438 case PVRDMA_EVENT_PORT_ERR: 439 case PVRDMA_EVENT_LID_CHANGE: 440 case PVRDMA_EVENT_PKEY_CHANGE: 441 case PVRDMA_EVENT_SM_CHANGE: 442 case PVRDMA_EVENT_CLIENT_REREGISTER: 443 case PVRDMA_EVENT_GID_CHANGE: 444 pvrdma_dev_event(dev, eqe->info, eqe->type); 445 break; 446 447 case PVRDMA_EVENT_DEVICE_FATAL: 448 pvrdma_dev_event(dev, 1, eqe->type); 449 break; 450 451 default: 452 break; 453 } 454 455 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); 456 } 457 458 return IRQ_HANDLED; 459 } 460 461 static inline struct pvrdma_cqne *get_cqne(struct pvrdma_dev *dev, 462 unsigned int i) 463 { 464 return (struct pvrdma_cqne *)pvrdma_page_dir_get_ptr( 465 &dev->cq_pdir, 466 PAGE_SIZE + 467 sizeof(struct pvrdma_cqne) * i); 468 } 469 470 static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) 471 { 472 struct pvrdma_dev *dev = dev_id; 473 struct pvrdma_ring *ring = &dev->cq_ring_state->rx; 474 int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE / 475 sizeof(struct pvrdma_cqne); 476 unsigned int head; 477 478 dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n"); 479 480 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { 481 struct pvrdma_cqne *cqne; 482 struct pvrdma_cq *cq; 483 484 cqne = get_cqne(dev, head); 485 spin_lock(&dev->cq_tbl_lock); 486 cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq]; 487 if (cq) 488 refcount_inc(&cq->refcnt); 489 spin_unlock(&dev->cq_tbl_lock); 490 491 if (cq && cq->ibcq.comp_handler) 492 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 493 if (cq) { 494 if (refcount_dec_and_test(&cq->refcnt)) 495 complete(&cq->free); 496 } 497 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); 498 } 499 500 return IRQ_HANDLED; 501 } 502 503 static void pvrdma_free_irq(struct pvrdma_dev *dev) 504 { 505 int i; 506 507 dev_dbg(&dev->pdev->dev, "freeing interrupts\n"); 508 for (i = 0; i < dev->nr_vectors; i++) 509 free_irq(pci_irq_vector(dev->pdev, i), dev); 510 } 511 512 static void pvrdma_enable_intrs(struct pvrdma_dev *dev) 513 { 514 dev_dbg(&dev->pdev->dev, "enable interrupts\n"); 515 pvrdma_write_reg(dev, PVRDMA_REG_IMR, 0); 516 } 517 518 static void pvrdma_disable_intrs(struct pvrdma_dev *dev) 519 { 520 dev_dbg(&dev->pdev->dev, "disable interrupts\n"); 521 pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0); 522 } 523 524 static int pvrdma_alloc_intrs(struct pvrdma_dev *dev) 525 { 526 struct pci_dev *pdev = dev->pdev; 527 int ret = 0, i; 528 529 ret = pci_alloc_irq_vectors(pdev, 1, PVRDMA_MAX_INTERRUPTS, 530 PCI_IRQ_MSIX); 531 if (ret < 0) { 532 ret = pci_alloc_irq_vectors(pdev, 1, 1, 533 PCI_IRQ_MSI | PCI_IRQ_LEGACY); 534 if (ret < 0) 535 return ret; 536 } 537 dev->nr_vectors = ret; 538 539 ret = request_irq(pci_irq_vector(dev->pdev, 0), pvrdma_intr0_handler, 540 pdev->msix_enabled ? 0 : IRQF_SHARED, DRV_NAME, dev); 541 if (ret) { 542 dev_err(&dev->pdev->dev, 543 "failed to request interrupt 0\n"); 544 goto out_free_vectors; 545 } 546 547 for (i = 1; i < dev->nr_vectors; i++) { 548 ret = request_irq(pci_irq_vector(dev->pdev, i), 549 i == 1 ? pvrdma_intr1_handler : 550 pvrdma_intrx_handler, 551 0, DRV_NAME, dev); 552 if (ret) { 553 dev_err(&dev->pdev->dev, 554 "failed to request interrupt %d\n", i); 555 goto free_irqs; 556 } 557 } 558 559 return 0; 560 561 free_irqs: 562 while (--i >= 0) 563 free_irq(pci_irq_vector(dev->pdev, i), dev); 564 out_free_vectors: 565 pci_free_irq_vectors(pdev); 566 return ret; 567 } 568 569 static void pvrdma_free_slots(struct pvrdma_dev *dev) 570 { 571 struct pci_dev *pdev = dev->pdev; 572 573 if (dev->resp_slot) 574 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->resp_slot, 575 dev->dsr->resp_slot_dma); 576 if (dev->cmd_slot) 577 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->cmd_slot, 578 dev->dsr->cmd_slot_dma); 579 } 580 581 static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, 582 const union ib_gid *gid, 583 u8 gid_type, 584 int index) 585 { 586 int ret; 587 union pvrdma_cmd_req req; 588 struct pvrdma_cmd_create_bind *cmd_bind = &req.create_bind; 589 590 if (!dev->sgid_tbl) { 591 dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); 592 return -EINVAL; 593 } 594 595 memset(cmd_bind, 0, sizeof(*cmd_bind)); 596 cmd_bind->hdr.cmd = PVRDMA_CMD_CREATE_BIND; 597 memcpy(cmd_bind->new_gid, gid->raw, 16); 598 cmd_bind->mtu = ib_mtu_enum_to_int(IB_MTU_1024); 599 cmd_bind->vlan = 0xfff; 600 cmd_bind->index = index; 601 cmd_bind->gid_type = gid_type; 602 603 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 604 if (ret < 0) { 605 dev_warn(&dev->pdev->dev, 606 "could not create binding, error: %d\n", ret); 607 return -EFAULT; 608 } 609 memcpy(&dev->sgid_tbl[index], gid, sizeof(*gid)); 610 return 0; 611 } 612 613 static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context) 614 { 615 struct pvrdma_dev *dev = to_vdev(attr->device); 616 617 return pvrdma_add_gid_at_index(dev, &attr->gid, 618 ib_gid_type_to_pvrdma(attr->gid_type), 619 attr->index); 620 } 621 622 static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index) 623 { 624 int ret; 625 union pvrdma_cmd_req req; 626 struct pvrdma_cmd_destroy_bind *cmd_dest = &req.destroy_bind; 627 628 /* Update sgid table. */ 629 if (!dev->sgid_tbl) { 630 dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); 631 return -EINVAL; 632 } 633 634 memset(cmd_dest, 0, sizeof(*cmd_dest)); 635 cmd_dest->hdr.cmd = PVRDMA_CMD_DESTROY_BIND; 636 memcpy(cmd_dest->dest_gid, &dev->sgid_tbl[index], 16); 637 cmd_dest->index = index; 638 639 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 640 if (ret < 0) { 641 dev_warn(&dev->pdev->dev, 642 "could not destroy binding, error: %d\n", ret); 643 return ret; 644 } 645 memset(&dev->sgid_tbl[index], 0, 16); 646 return 0; 647 } 648 649 static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context) 650 { 651 struct pvrdma_dev *dev = to_vdev(attr->device); 652 653 dev_dbg(&dev->pdev->dev, "removing gid at index %u from %s", 654 attr->index, dev->netdev->name); 655 656 return pvrdma_del_gid_at_index(dev, attr->index); 657 } 658 659 static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, 660 struct net_device *ndev, 661 unsigned long event) 662 { 663 struct pci_dev *pdev_net; 664 unsigned int slot; 665 666 switch (event) { 667 case NETDEV_REBOOT: 668 case NETDEV_DOWN: 669 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); 670 break; 671 case NETDEV_UP: 672 pvrdma_write_reg(dev, PVRDMA_REG_CTL, 673 PVRDMA_DEVICE_CTL_UNQUIESCE); 674 675 mb(); 676 677 if (pvrdma_read_reg(dev, PVRDMA_REG_ERR)) 678 dev_err(&dev->pdev->dev, 679 "failed to activate device during link up\n"); 680 else 681 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); 682 break; 683 case NETDEV_UNREGISTER: 684 ib_device_set_netdev(&dev->ib_dev, NULL, 1); 685 dev_put(dev->netdev); 686 dev->netdev = NULL; 687 break; 688 case NETDEV_REGISTER: 689 /* vmxnet3 will have same bus, slot. But func will be 0 */ 690 slot = PCI_SLOT(dev->pdev->devfn); 691 pdev_net = pci_get_slot(dev->pdev->bus, 692 PCI_DEVFN(slot, 0)); 693 if ((dev->netdev == NULL) && 694 (pci_get_drvdata(pdev_net) == ndev)) { 695 /* this is our netdev */ 696 ib_device_set_netdev(&dev->ib_dev, ndev, 1); 697 dev->netdev = ndev; 698 dev_hold(ndev); 699 } 700 pci_dev_put(pdev_net); 701 break; 702 703 default: 704 dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", 705 event, dev_name(&dev->ib_dev.dev)); 706 break; 707 } 708 } 709 710 static void pvrdma_netdevice_event_work(struct work_struct *work) 711 { 712 struct pvrdma_netdevice_work *netdev_work; 713 struct pvrdma_dev *dev; 714 715 netdev_work = container_of(work, struct pvrdma_netdevice_work, work); 716 717 mutex_lock(&pvrdma_device_list_lock); 718 list_for_each_entry(dev, &pvrdma_device_list, device_link) { 719 if ((netdev_work->event == NETDEV_REGISTER) || 720 (dev->netdev == netdev_work->event_netdev)) { 721 pvrdma_netdevice_event_handle(dev, 722 netdev_work->event_netdev, 723 netdev_work->event); 724 break; 725 } 726 } 727 mutex_unlock(&pvrdma_device_list_lock); 728 729 kfree(netdev_work); 730 } 731 732 static int pvrdma_netdevice_event(struct notifier_block *this, 733 unsigned long event, void *ptr) 734 { 735 struct net_device *event_netdev = netdev_notifier_info_to_dev(ptr); 736 struct pvrdma_netdevice_work *netdev_work; 737 738 netdev_work = kmalloc(sizeof(*netdev_work), GFP_ATOMIC); 739 if (!netdev_work) 740 return NOTIFY_BAD; 741 742 INIT_WORK(&netdev_work->work, pvrdma_netdevice_event_work); 743 netdev_work->event_netdev = event_netdev; 744 netdev_work->event = event; 745 queue_work(event_wq, &netdev_work->work); 746 747 return NOTIFY_DONE; 748 } 749 750 static int pvrdma_pci_probe(struct pci_dev *pdev, 751 const struct pci_device_id *id) 752 { 753 struct pci_dev *pdev_net; 754 struct pvrdma_dev *dev; 755 int ret; 756 unsigned long start; 757 unsigned long len; 758 dma_addr_t slot_dma = 0; 759 760 dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev)); 761 762 /* Allocate zero-out device */ 763 dev = ib_alloc_device(pvrdma_dev, ib_dev); 764 if (!dev) { 765 dev_err(&pdev->dev, "failed to allocate IB device\n"); 766 return -ENOMEM; 767 } 768 769 mutex_lock(&pvrdma_device_list_lock); 770 list_add(&dev->device_link, &pvrdma_device_list); 771 mutex_unlock(&pvrdma_device_list_lock); 772 773 ret = pvrdma_init_device(dev); 774 if (ret) 775 goto err_free_device; 776 777 dev->pdev = pdev; 778 pci_set_drvdata(pdev, dev); 779 780 ret = pci_enable_device(pdev); 781 if (ret) { 782 dev_err(&pdev->dev, "cannot enable PCI device\n"); 783 goto err_free_device; 784 } 785 786 dev_dbg(&pdev->dev, "PCI resource flags BAR0 %#lx\n", 787 pci_resource_flags(pdev, 0)); 788 dev_dbg(&pdev->dev, "PCI resource len %#llx\n", 789 (unsigned long long)pci_resource_len(pdev, 0)); 790 dev_dbg(&pdev->dev, "PCI resource start %#llx\n", 791 (unsigned long long)pci_resource_start(pdev, 0)); 792 dev_dbg(&pdev->dev, "PCI resource flags BAR1 %#lx\n", 793 pci_resource_flags(pdev, 1)); 794 dev_dbg(&pdev->dev, "PCI resource len %#llx\n", 795 (unsigned long long)pci_resource_len(pdev, 1)); 796 dev_dbg(&pdev->dev, "PCI resource start %#llx\n", 797 (unsigned long long)pci_resource_start(pdev, 1)); 798 799 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || 800 !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { 801 dev_err(&pdev->dev, "PCI BAR region not MMIO\n"); 802 ret = -ENOMEM; 803 goto err_disable_pdev; 804 } 805 806 ret = pci_request_regions(pdev, DRV_NAME); 807 if (ret) { 808 dev_err(&pdev->dev, "cannot request PCI resources\n"); 809 goto err_disable_pdev; 810 } 811 812 /* Enable 64-Bit DMA */ 813 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { 814 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 815 if (ret != 0) { 816 dev_err(&pdev->dev, 817 "pci_set_consistent_dma_mask failed\n"); 818 goto err_free_resource; 819 } 820 } else { 821 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 822 if (ret != 0) { 823 dev_err(&pdev->dev, 824 "pci_set_dma_mask failed\n"); 825 goto err_free_resource; 826 } 827 } 828 dma_set_max_seg_size(&pdev->dev, UINT_MAX); 829 pci_set_master(pdev); 830 831 /* Map register space */ 832 start = pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_REG); 833 len = pci_resource_len(dev->pdev, PVRDMA_PCI_RESOURCE_REG); 834 dev->regs = ioremap(start, len); 835 if (!dev->regs) { 836 dev_err(&pdev->dev, "register mapping failed\n"); 837 ret = -ENOMEM; 838 goto err_free_resource; 839 } 840 841 /* Setup per-device UAR. */ 842 dev->driver_uar.index = 0; 843 dev->driver_uar.pfn = 844 pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >> 845 PAGE_SHIFT; 846 dev->driver_uar.map = 847 ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); 848 if (!dev->driver_uar.map) { 849 dev_err(&pdev->dev, "failed to remap UAR pages\n"); 850 ret = -ENOMEM; 851 goto err_unmap_regs; 852 } 853 854 dev->dsr_version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION); 855 dev_info(&pdev->dev, "device version %d, driver version %d\n", 856 dev->dsr_version, PVRDMA_VERSION); 857 858 dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr), 859 &dev->dsrbase, GFP_KERNEL); 860 if (!dev->dsr) { 861 dev_err(&pdev->dev, "failed to allocate shared region\n"); 862 ret = -ENOMEM; 863 goto err_uar_unmap; 864 } 865 866 /* Setup the shared region */ 867 dev->dsr->driver_version = PVRDMA_VERSION; 868 dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ? 869 PVRDMA_GOS_BITS_32 : 870 PVRDMA_GOS_BITS_64; 871 dev->dsr->gos_info.gos_type = PVRDMA_GOS_TYPE_LINUX; 872 dev->dsr->gos_info.gos_ver = 1; 873 874 if (dev->dsr_version < PVRDMA_PPN64_VERSION) 875 dev->dsr->uar_pfn = dev->driver_uar.pfn; 876 else 877 dev->dsr->uar_pfn64 = dev->driver_uar.pfn; 878 879 /* Command slot. */ 880 dev->cmd_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, 881 &slot_dma, GFP_KERNEL); 882 if (!dev->cmd_slot) { 883 ret = -ENOMEM; 884 goto err_free_dsr; 885 } 886 887 dev->dsr->cmd_slot_dma = (u64)slot_dma; 888 889 /* Response slot. */ 890 dev->resp_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, 891 &slot_dma, GFP_KERNEL); 892 if (!dev->resp_slot) { 893 ret = -ENOMEM; 894 goto err_free_slots; 895 } 896 897 dev->dsr->resp_slot_dma = (u64)slot_dma; 898 899 /* Async event ring */ 900 dev->dsr->async_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES; 901 ret = pvrdma_page_dir_init(dev, &dev->async_pdir, 902 dev->dsr->async_ring_pages.num_pages, true); 903 if (ret) 904 goto err_free_slots; 905 dev->async_ring_state = dev->async_pdir.pages[0]; 906 dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma; 907 908 /* CQ notification ring */ 909 dev->dsr->cq_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES; 910 ret = pvrdma_page_dir_init(dev, &dev->cq_pdir, 911 dev->dsr->cq_ring_pages.num_pages, true); 912 if (ret) 913 goto err_free_async_ring; 914 dev->cq_ring_state = dev->cq_pdir.pages[0]; 915 dev->dsr->cq_ring_pages.pdir_dma = dev->cq_pdir.dir_dma; 916 917 /* 918 * Write the PA of the shared region to the device. The writes must be 919 * ordered such that the high bits are written last. When the writes 920 * complete, the device will have filled out the capabilities. 921 */ 922 923 pvrdma_write_reg(dev, PVRDMA_REG_DSRLOW, (u32)dev->dsrbase); 924 pvrdma_write_reg(dev, PVRDMA_REG_DSRHIGH, 925 (u32)((u64)(dev->dsrbase) >> 32)); 926 927 /* Make sure the write is complete before reading status. */ 928 mb(); 929 930 /* The driver supports RoCE V1 and V2. */ 931 if (!PVRDMA_SUPPORTED(dev)) { 932 dev_err(&pdev->dev, "driver needs RoCE v1 or v2 support\n"); 933 ret = -EFAULT; 934 goto err_free_cq_ring; 935 } 936 937 /* Paired vmxnet3 will have same bus, slot. But func will be 0 */ 938 pdev_net = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0)); 939 if (!pdev_net) { 940 dev_err(&pdev->dev, "failed to find paired net device\n"); 941 ret = -ENODEV; 942 goto err_free_cq_ring; 943 } 944 945 if (pdev_net->vendor != PCI_VENDOR_ID_VMWARE || 946 pdev_net->device != PCI_DEVICE_ID_VMWARE_VMXNET3) { 947 dev_err(&pdev->dev, "failed to find paired vmxnet3 device\n"); 948 pci_dev_put(pdev_net); 949 ret = -ENODEV; 950 goto err_free_cq_ring; 951 } 952 953 dev->netdev = pci_get_drvdata(pdev_net); 954 pci_dev_put(pdev_net); 955 if (!dev->netdev) { 956 dev_err(&pdev->dev, "failed to get vmxnet3 device\n"); 957 ret = -ENODEV; 958 goto err_free_cq_ring; 959 } 960 dev_hold(dev->netdev); 961 962 dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name); 963 964 /* Interrupt setup */ 965 ret = pvrdma_alloc_intrs(dev); 966 if (ret) { 967 dev_err(&pdev->dev, "failed to allocate interrupts\n"); 968 ret = -ENOMEM; 969 goto err_free_cq_ring; 970 } 971 972 /* Allocate UAR table. */ 973 ret = pvrdma_uar_table_init(dev); 974 if (ret) { 975 dev_err(&pdev->dev, "failed to allocate UAR table\n"); 976 ret = -ENOMEM; 977 goto err_free_intrs; 978 } 979 980 /* Allocate GID table */ 981 dev->sgid_tbl = kcalloc(dev->dsr->caps.gid_tbl_len, 982 sizeof(union ib_gid), GFP_KERNEL); 983 if (!dev->sgid_tbl) { 984 ret = -ENOMEM; 985 goto err_free_uar_table; 986 } 987 dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len); 988 989 pvrdma_enable_intrs(dev); 990 991 /* Activate pvrdma device */ 992 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE); 993 994 /* Make sure the write is complete before reading status. */ 995 mb(); 996 997 /* Check if device was successfully activated */ 998 ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR); 999 if (ret != 0) { 1000 dev_err(&pdev->dev, "failed to activate device\n"); 1001 ret = -EFAULT; 1002 goto err_disable_intr; 1003 } 1004 1005 /* Register IB device */ 1006 ret = pvrdma_register_device(dev); 1007 if (ret) { 1008 dev_err(&pdev->dev, "failed to register IB device\n"); 1009 goto err_disable_intr; 1010 } 1011 1012 dev->nb_netdev.notifier_call = pvrdma_netdevice_event; 1013 ret = register_netdevice_notifier(&dev->nb_netdev); 1014 if (ret) { 1015 dev_err(&pdev->dev, "failed to register netdevice events\n"); 1016 goto err_unreg_ibdev; 1017 } 1018 1019 dev_info(&pdev->dev, "attached to device\n"); 1020 return 0; 1021 1022 err_unreg_ibdev: 1023 ib_unregister_device(&dev->ib_dev); 1024 err_disable_intr: 1025 pvrdma_disable_intrs(dev); 1026 kfree(dev->sgid_tbl); 1027 err_free_uar_table: 1028 pvrdma_uar_table_cleanup(dev); 1029 err_free_intrs: 1030 pvrdma_free_irq(dev); 1031 pci_free_irq_vectors(pdev); 1032 err_free_cq_ring: 1033 if (dev->netdev) { 1034 dev_put(dev->netdev); 1035 dev->netdev = NULL; 1036 } 1037 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); 1038 err_free_async_ring: 1039 pvrdma_page_dir_cleanup(dev, &dev->async_pdir); 1040 err_free_slots: 1041 pvrdma_free_slots(dev); 1042 err_free_dsr: 1043 dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr, 1044 dev->dsrbase); 1045 err_uar_unmap: 1046 iounmap(dev->driver_uar.map); 1047 err_unmap_regs: 1048 iounmap(dev->regs); 1049 err_free_resource: 1050 pci_release_regions(pdev); 1051 err_disable_pdev: 1052 pci_disable_device(pdev); 1053 pci_set_drvdata(pdev, NULL); 1054 err_free_device: 1055 mutex_lock(&pvrdma_device_list_lock); 1056 list_del(&dev->device_link); 1057 mutex_unlock(&pvrdma_device_list_lock); 1058 ib_dealloc_device(&dev->ib_dev); 1059 return ret; 1060 } 1061 1062 static void pvrdma_pci_remove(struct pci_dev *pdev) 1063 { 1064 struct pvrdma_dev *dev = pci_get_drvdata(pdev); 1065 1066 if (!dev) 1067 return; 1068 1069 dev_info(&pdev->dev, "detaching from device\n"); 1070 1071 unregister_netdevice_notifier(&dev->nb_netdev); 1072 dev->nb_netdev.notifier_call = NULL; 1073 1074 flush_workqueue(event_wq); 1075 1076 if (dev->netdev) { 1077 dev_put(dev->netdev); 1078 dev->netdev = NULL; 1079 } 1080 1081 /* Unregister ib device */ 1082 ib_unregister_device(&dev->ib_dev); 1083 1084 mutex_lock(&pvrdma_device_list_lock); 1085 list_del(&dev->device_link); 1086 mutex_unlock(&pvrdma_device_list_lock); 1087 1088 pvrdma_disable_intrs(dev); 1089 pvrdma_free_irq(dev); 1090 pci_free_irq_vectors(pdev); 1091 1092 /* Deactivate pvrdma device */ 1093 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET); 1094 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); 1095 pvrdma_page_dir_cleanup(dev, &dev->async_pdir); 1096 pvrdma_free_slots(dev); 1097 dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr, 1098 dev->dsrbase); 1099 1100 iounmap(dev->regs); 1101 kfree(dev->sgid_tbl); 1102 kfree(dev->cq_tbl); 1103 kfree(dev->srq_tbl); 1104 kfree(dev->qp_tbl); 1105 pvrdma_uar_table_cleanup(dev); 1106 iounmap(dev->driver_uar.map); 1107 1108 ib_dealloc_device(&dev->ib_dev); 1109 1110 /* Free pci resources */ 1111 pci_release_regions(pdev); 1112 pci_disable_device(pdev); 1113 pci_set_drvdata(pdev, NULL); 1114 } 1115 1116 static const struct pci_device_id pvrdma_pci_table[] = { 1117 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_PVRDMA), }, 1118 { 0 }, 1119 }; 1120 1121 MODULE_DEVICE_TABLE(pci, pvrdma_pci_table); 1122 1123 static struct pci_driver pvrdma_driver = { 1124 .name = DRV_NAME, 1125 .id_table = pvrdma_pci_table, 1126 .probe = pvrdma_pci_probe, 1127 .remove = pvrdma_pci_remove, 1128 }; 1129 1130 static int __init pvrdma_init(void) 1131 { 1132 int err; 1133 1134 event_wq = alloc_ordered_workqueue("pvrdma_event_wq", WQ_MEM_RECLAIM); 1135 if (!event_wq) 1136 return -ENOMEM; 1137 1138 err = pci_register_driver(&pvrdma_driver); 1139 if (err) 1140 destroy_workqueue(event_wq); 1141 1142 return err; 1143 } 1144 1145 static void __exit pvrdma_cleanup(void) 1146 { 1147 pci_unregister_driver(&pvrdma_driver); 1148 1149 destroy_workqueue(event_wq); 1150 } 1151 1152 module_init(pvrdma_init); 1153 module_exit(pvrdma_cleanup); 1154 1155 MODULE_AUTHOR("VMware, Inc"); 1156 MODULE_DESCRIPTION("VMware Paravirtual RDMA driver"); 1157 MODULE_LICENSE("Dual BSD/GPL"); 1158