1 /* 2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of EITHER the GNU General Public License 6 * version 2 as published by the Free Software Foundation or the BSD 7 * 2-Clause License. This program is distributed in the hope that it 8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED 9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 10 * See the GNU General Public License version 2 for more details at 11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program available in the file COPYING in the main 15 * directory of this source tree. 16 * 17 * The BSD 2-Clause License 18 * 19 * Redistribution and use in source and binary forms, with or 20 * without modification, are permitted provided that the following 21 * conditions are met: 22 * 23 * - Redistributions of source code must retain the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer. 26 * 27 * - Redistributions in binary form must reproduce the above 28 * copyright notice, this list of conditions and the following 29 * disclaimer in the documentation and/or other materials 30 * provided with the distribution. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 43 * OF THE POSSIBILITY OF SUCH DAMAGE. 44 */ 45 46 #include <linux/errno.h> 47 #include <linux/inetdevice.h> 48 #include <linux/init.h> 49 #include <linux/module.h> 50 #include <linux/slab.h> 51 #include <rdma/ib_addr.h> 52 #include <rdma/ib_smi.h> 53 #include <rdma/ib_user_verbs.h> 54 #include <net/addrconf.h> 55 56 #include "pvrdma.h" 57 58 #define DRV_NAME "vmw_pvrdma" 59 #define DRV_VERSION "1.0.1.0-k" 60 61 static DEFINE_MUTEX(pvrdma_device_list_lock); 62 static LIST_HEAD(pvrdma_device_list); 63 static struct workqueue_struct *event_wq; 64 65 static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context); 66 static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); 67 68 static ssize_t hca_type_show(struct device *device, 69 struct device_attribute *attr, char *buf) 70 { 71 return sysfs_emit(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); 72 } 73 static DEVICE_ATTR_RO(hca_type); 74 75 static ssize_t hw_rev_show(struct device *device, 76 struct device_attribute *attr, char *buf) 77 { 78 return sysfs_emit(buf, "%d\n", PVRDMA_REV_ID); 79 } 80 static DEVICE_ATTR_RO(hw_rev); 81 82 static ssize_t board_id_show(struct device *device, 83 struct device_attribute *attr, char *buf) 84 { 85 return sysfs_emit(buf, "%d\n", PVRDMA_BOARD_ID); 86 } 87 static DEVICE_ATTR_RO(board_id); 88 89 static struct attribute *pvrdma_class_attributes[] = { 90 &dev_attr_hw_rev.attr, 91 &dev_attr_hca_type.attr, 92 &dev_attr_board_id.attr, 93 NULL, 94 }; 95 96 static const struct attribute_group pvrdma_attr_group = { 97 .attrs = pvrdma_class_attributes, 98 }; 99 100 static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str) 101 { 102 struct pvrdma_dev *dev = 103 container_of(device, struct pvrdma_dev, ib_dev); 104 snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d\n", 105 (int) (dev->dsr->caps.fw_ver >> 32), 106 (int) (dev->dsr->caps.fw_ver >> 16) & 0xffff, 107 (int) dev->dsr->caps.fw_ver & 0xffff); 108 } 109 110 static int pvrdma_init_device(struct pvrdma_dev *dev) 111 { 112 /* Initialize some device related stuff */ 113 spin_lock_init(&dev->cmd_lock); 114 sema_init(&dev->cmd_sema, 1); 115 atomic_set(&dev->num_qps, 0); 116 atomic_set(&dev->num_srqs, 0); 117 atomic_set(&dev->num_cqs, 0); 118 atomic_set(&dev->num_pds, 0); 119 atomic_set(&dev->num_ahs, 0); 120 121 return 0; 122 } 123 124 static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num, 125 struct ib_port_immutable *immutable) 126 { 127 struct pvrdma_dev *dev = to_vdev(ibdev); 128 struct ib_port_attr attr; 129 int err; 130 131 if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V1) 132 immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE; 133 else if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V2) 134 immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; 135 136 err = ib_query_port(ibdev, port_num, &attr); 137 if (err) 138 return err; 139 140 immutable->pkey_tbl_len = attr.pkey_tbl_len; 141 immutable->gid_tbl_len = attr.gid_tbl_len; 142 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 143 return 0; 144 } 145 146 static const struct ib_device_ops pvrdma_dev_ops = { 147 .owner = THIS_MODULE, 148 .driver_id = RDMA_DRIVER_VMW_PVRDMA, 149 .uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION, 150 151 .add_gid = pvrdma_add_gid, 152 .alloc_mr = pvrdma_alloc_mr, 153 .alloc_pd = pvrdma_alloc_pd, 154 .alloc_ucontext = pvrdma_alloc_ucontext, 155 .create_ah = pvrdma_create_ah, 156 .create_cq = pvrdma_create_cq, 157 .create_qp = pvrdma_create_qp, 158 .dealloc_pd = pvrdma_dealloc_pd, 159 .dealloc_ucontext = pvrdma_dealloc_ucontext, 160 .del_gid = pvrdma_del_gid, 161 .dereg_mr = pvrdma_dereg_mr, 162 .destroy_ah = pvrdma_destroy_ah, 163 .destroy_cq = pvrdma_destroy_cq, 164 .destroy_qp = pvrdma_destroy_qp, 165 .get_dev_fw_str = pvrdma_get_fw_ver_str, 166 .get_dma_mr = pvrdma_get_dma_mr, 167 .get_link_layer = pvrdma_port_link_layer, 168 .get_port_immutable = pvrdma_port_immutable, 169 .map_mr_sg = pvrdma_map_mr_sg, 170 .mmap = pvrdma_mmap, 171 .modify_port = pvrdma_modify_port, 172 .modify_qp = pvrdma_modify_qp, 173 .poll_cq = pvrdma_poll_cq, 174 .post_recv = pvrdma_post_recv, 175 .post_send = pvrdma_post_send, 176 .query_device = pvrdma_query_device, 177 .query_gid = pvrdma_query_gid, 178 .query_pkey = pvrdma_query_pkey, 179 .query_port = pvrdma_query_port, 180 .query_qp = pvrdma_query_qp, 181 .reg_user_mr = pvrdma_reg_user_mr, 182 .req_notify_cq = pvrdma_req_notify_cq, 183 184 INIT_RDMA_OBJ_SIZE(ib_ah, pvrdma_ah, ibah), 185 INIT_RDMA_OBJ_SIZE(ib_cq, pvrdma_cq, ibcq), 186 INIT_RDMA_OBJ_SIZE(ib_pd, pvrdma_pd, ibpd), 187 INIT_RDMA_OBJ_SIZE(ib_ucontext, pvrdma_ucontext, ibucontext), 188 }; 189 190 static const struct ib_device_ops pvrdma_dev_srq_ops = { 191 .create_srq = pvrdma_create_srq, 192 .destroy_srq = pvrdma_destroy_srq, 193 .modify_srq = pvrdma_modify_srq, 194 .query_srq = pvrdma_query_srq, 195 196 INIT_RDMA_OBJ_SIZE(ib_srq, pvrdma_srq, ibsrq), 197 }; 198 199 static int pvrdma_register_device(struct pvrdma_dev *dev) 200 { 201 int ret = -1; 202 203 dev->ib_dev.node_guid = dev->dsr->caps.node_guid; 204 dev->sys_image_guid = dev->dsr->caps.sys_image_guid; 205 dev->flags = 0; 206 dev->ib_dev.num_comp_vectors = 1; 207 dev->ib_dev.dev.parent = &dev->pdev->dev; 208 209 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 210 dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; 211 212 ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_ops); 213 214 mutex_init(&dev->port_mutex); 215 spin_lock_init(&dev->desc_lock); 216 217 dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(struct pvrdma_cq *), 218 GFP_KERNEL); 219 if (!dev->cq_tbl) 220 return ret; 221 spin_lock_init(&dev->cq_tbl_lock); 222 223 dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(struct pvrdma_qp *), 224 GFP_KERNEL); 225 if (!dev->qp_tbl) 226 goto err_cq_free; 227 spin_lock_init(&dev->qp_tbl_lock); 228 229 /* Check if SRQ is supported by backend */ 230 if (dev->dsr->caps.max_srq) { 231 ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops); 232 233 dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, 234 sizeof(struct pvrdma_srq *), 235 GFP_KERNEL); 236 if (!dev->srq_tbl) 237 goto err_qp_free; 238 } 239 ret = ib_device_set_netdev(&dev->ib_dev, dev->netdev, 1); 240 if (ret) 241 goto err_srq_free; 242 spin_lock_init(&dev->srq_tbl_lock); 243 rdma_set_device_sysfs_group(&dev->ib_dev, &pvrdma_attr_group); 244 245 ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", &dev->pdev->dev); 246 if (ret) 247 goto err_srq_free; 248 249 dev->ib_active = true; 250 251 return 0; 252 253 err_srq_free: 254 kfree(dev->srq_tbl); 255 err_qp_free: 256 kfree(dev->qp_tbl); 257 err_cq_free: 258 kfree(dev->cq_tbl); 259 260 return ret; 261 } 262 263 static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id) 264 { 265 u32 icr = PVRDMA_INTR_CAUSE_RESPONSE; 266 struct pvrdma_dev *dev = dev_id; 267 268 dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n"); 269 270 if (!dev->pdev->msix_enabled) { 271 /* Legacy intr */ 272 icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR); 273 if (icr == 0) 274 return IRQ_NONE; 275 } 276 277 if (icr == PVRDMA_INTR_CAUSE_RESPONSE) 278 complete(&dev->cmd_done); 279 280 return IRQ_HANDLED; 281 } 282 283 static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type) 284 { 285 struct pvrdma_qp *qp; 286 unsigned long flags; 287 288 spin_lock_irqsave(&dev->qp_tbl_lock, flags); 289 qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp]; 290 if (qp) 291 refcount_inc(&qp->refcnt); 292 spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); 293 294 if (qp && qp->ibqp.event_handler) { 295 struct ib_qp *ibqp = &qp->ibqp; 296 struct ib_event e; 297 298 e.device = ibqp->device; 299 e.element.qp = ibqp; 300 e.event = type; /* 1:1 mapping for now. */ 301 ibqp->event_handler(&e, ibqp->qp_context); 302 } 303 if (qp) { 304 if (refcount_dec_and_test(&qp->refcnt)) 305 complete(&qp->free); 306 } 307 } 308 309 static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type) 310 { 311 struct pvrdma_cq *cq; 312 unsigned long flags; 313 314 spin_lock_irqsave(&dev->cq_tbl_lock, flags); 315 cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq]; 316 if (cq) 317 refcount_inc(&cq->refcnt); 318 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 319 320 if (cq && cq->ibcq.event_handler) { 321 struct ib_cq *ibcq = &cq->ibcq; 322 struct ib_event e; 323 324 e.device = ibcq->device; 325 e.element.cq = ibcq; 326 e.event = type; /* 1:1 mapping for now. */ 327 ibcq->event_handler(&e, ibcq->cq_context); 328 } 329 if (cq) { 330 if (refcount_dec_and_test(&cq->refcnt)) 331 complete(&cq->free); 332 } 333 } 334 335 static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type) 336 { 337 struct pvrdma_srq *srq; 338 unsigned long flags; 339 340 spin_lock_irqsave(&dev->srq_tbl_lock, flags); 341 if (dev->srq_tbl) 342 srq = dev->srq_tbl[srqn % dev->dsr->caps.max_srq]; 343 else 344 srq = NULL; 345 if (srq) 346 refcount_inc(&srq->refcnt); 347 spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); 348 349 if (srq && srq->ibsrq.event_handler) { 350 struct ib_srq *ibsrq = &srq->ibsrq; 351 struct ib_event e; 352 353 e.device = ibsrq->device; 354 e.element.srq = ibsrq; 355 e.event = type; /* 1:1 mapping for now. */ 356 ibsrq->event_handler(&e, ibsrq->srq_context); 357 } 358 if (srq) { 359 if (refcount_dec_and_test(&srq->refcnt)) 360 complete(&srq->free); 361 } 362 } 363 364 static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port, 365 enum ib_event_type event) 366 { 367 struct ib_event ib_event; 368 369 memset(&ib_event, 0, sizeof(ib_event)); 370 ib_event.device = &dev->ib_dev; 371 ib_event.element.port_num = port; 372 ib_event.event = event; 373 ib_dispatch_event(&ib_event); 374 } 375 376 static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type) 377 { 378 if (port < 1 || port > dev->dsr->caps.phys_port_cnt) { 379 dev_warn(&dev->pdev->dev, "event on port %d\n", port); 380 return; 381 } 382 383 pvrdma_dispatch_event(dev, port, type); 384 } 385 386 static inline struct pvrdma_eqe *get_eqe(struct pvrdma_dev *dev, unsigned int i) 387 { 388 return (struct pvrdma_eqe *)pvrdma_page_dir_get_ptr( 389 &dev->async_pdir, 390 PAGE_SIZE + 391 sizeof(struct pvrdma_eqe) * i); 392 } 393 394 static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id) 395 { 396 struct pvrdma_dev *dev = dev_id; 397 struct pvrdma_ring *ring = &dev->async_ring_state->rx; 398 int ring_slots = (dev->dsr->async_ring_pages.num_pages - 1) * 399 PAGE_SIZE / sizeof(struct pvrdma_eqe); 400 unsigned int head; 401 402 dev_dbg(&dev->pdev->dev, "interrupt 1 (async event) handler\n"); 403 404 /* 405 * Don't process events until the IB device is registered. Otherwise 406 * we'll try to ib_dispatch_event() on an invalid device. 407 */ 408 if (!dev->ib_active) 409 return IRQ_HANDLED; 410 411 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { 412 struct pvrdma_eqe *eqe; 413 414 eqe = get_eqe(dev, head); 415 416 switch (eqe->type) { 417 case PVRDMA_EVENT_QP_FATAL: 418 case PVRDMA_EVENT_QP_REQ_ERR: 419 case PVRDMA_EVENT_QP_ACCESS_ERR: 420 case PVRDMA_EVENT_COMM_EST: 421 case PVRDMA_EVENT_SQ_DRAINED: 422 case PVRDMA_EVENT_PATH_MIG: 423 case PVRDMA_EVENT_PATH_MIG_ERR: 424 case PVRDMA_EVENT_QP_LAST_WQE_REACHED: 425 pvrdma_qp_event(dev, eqe->info, eqe->type); 426 break; 427 428 case PVRDMA_EVENT_CQ_ERR: 429 pvrdma_cq_event(dev, eqe->info, eqe->type); 430 break; 431 432 case PVRDMA_EVENT_SRQ_ERR: 433 case PVRDMA_EVENT_SRQ_LIMIT_REACHED: 434 pvrdma_srq_event(dev, eqe->info, eqe->type); 435 break; 436 437 case PVRDMA_EVENT_PORT_ACTIVE: 438 case PVRDMA_EVENT_PORT_ERR: 439 case PVRDMA_EVENT_LID_CHANGE: 440 case PVRDMA_EVENT_PKEY_CHANGE: 441 case PVRDMA_EVENT_SM_CHANGE: 442 case PVRDMA_EVENT_CLIENT_REREGISTER: 443 case PVRDMA_EVENT_GID_CHANGE: 444 pvrdma_dev_event(dev, eqe->info, eqe->type); 445 break; 446 447 case PVRDMA_EVENT_DEVICE_FATAL: 448 pvrdma_dev_event(dev, 1, eqe->type); 449 break; 450 451 default: 452 break; 453 } 454 455 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); 456 } 457 458 return IRQ_HANDLED; 459 } 460 461 static inline struct pvrdma_cqne *get_cqne(struct pvrdma_dev *dev, 462 unsigned int i) 463 { 464 return (struct pvrdma_cqne *)pvrdma_page_dir_get_ptr( 465 &dev->cq_pdir, 466 PAGE_SIZE + 467 sizeof(struct pvrdma_cqne) * i); 468 } 469 470 static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) 471 { 472 struct pvrdma_dev *dev = dev_id; 473 struct pvrdma_ring *ring = &dev->cq_ring_state->rx; 474 int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE / 475 sizeof(struct pvrdma_cqne); 476 unsigned int head; 477 unsigned long flags; 478 479 dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n"); 480 481 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { 482 struct pvrdma_cqne *cqne; 483 struct pvrdma_cq *cq; 484 485 cqne = get_cqne(dev, head); 486 spin_lock_irqsave(&dev->cq_tbl_lock, flags); 487 cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq]; 488 if (cq) 489 refcount_inc(&cq->refcnt); 490 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 491 492 if (cq && cq->ibcq.comp_handler) 493 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 494 if (cq) { 495 if (refcount_dec_and_test(&cq->refcnt)) 496 complete(&cq->free); 497 } 498 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); 499 } 500 501 return IRQ_HANDLED; 502 } 503 504 static void pvrdma_free_irq(struct pvrdma_dev *dev) 505 { 506 int i; 507 508 dev_dbg(&dev->pdev->dev, "freeing interrupts\n"); 509 for (i = 0; i < dev->nr_vectors; i++) 510 free_irq(pci_irq_vector(dev->pdev, i), dev); 511 } 512 513 static void pvrdma_enable_intrs(struct pvrdma_dev *dev) 514 { 515 dev_dbg(&dev->pdev->dev, "enable interrupts\n"); 516 pvrdma_write_reg(dev, PVRDMA_REG_IMR, 0); 517 } 518 519 static void pvrdma_disable_intrs(struct pvrdma_dev *dev) 520 { 521 dev_dbg(&dev->pdev->dev, "disable interrupts\n"); 522 pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0); 523 } 524 525 static int pvrdma_alloc_intrs(struct pvrdma_dev *dev) 526 { 527 struct pci_dev *pdev = dev->pdev; 528 int ret = 0, i; 529 530 ret = pci_alloc_irq_vectors(pdev, 1, PVRDMA_MAX_INTERRUPTS, 531 PCI_IRQ_MSIX); 532 if (ret < 0) { 533 ret = pci_alloc_irq_vectors(pdev, 1, 1, 534 PCI_IRQ_MSI | PCI_IRQ_LEGACY); 535 if (ret < 0) 536 return ret; 537 } 538 dev->nr_vectors = ret; 539 540 ret = request_irq(pci_irq_vector(dev->pdev, 0), pvrdma_intr0_handler, 541 pdev->msix_enabled ? 0 : IRQF_SHARED, DRV_NAME, dev); 542 if (ret) { 543 dev_err(&dev->pdev->dev, 544 "failed to request interrupt 0\n"); 545 goto out_free_vectors; 546 } 547 548 for (i = 1; i < dev->nr_vectors; i++) { 549 ret = request_irq(pci_irq_vector(dev->pdev, i), 550 i == 1 ? pvrdma_intr1_handler : 551 pvrdma_intrx_handler, 552 0, DRV_NAME, dev); 553 if (ret) { 554 dev_err(&dev->pdev->dev, 555 "failed to request interrupt %d\n", i); 556 goto free_irqs; 557 } 558 } 559 560 return 0; 561 562 free_irqs: 563 while (--i >= 0) 564 free_irq(pci_irq_vector(dev->pdev, i), dev); 565 out_free_vectors: 566 pci_free_irq_vectors(pdev); 567 return ret; 568 } 569 570 static void pvrdma_free_slots(struct pvrdma_dev *dev) 571 { 572 struct pci_dev *pdev = dev->pdev; 573 574 if (dev->resp_slot) 575 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->resp_slot, 576 dev->dsr->resp_slot_dma); 577 if (dev->cmd_slot) 578 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->cmd_slot, 579 dev->dsr->cmd_slot_dma); 580 } 581 582 static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, 583 const union ib_gid *gid, 584 u8 gid_type, 585 int index) 586 { 587 int ret; 588 union pvrdma_cmd_req req; 589 struct pvrdma_cmd_create_bind *cmd_bind = &req.create_bind; 590 591 if (!dev->sgid_tbl) { 592 dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); 593 return -EINVAL; 594 } 595 596 memset(cmd_bind, 0, sizeof(*cmd_bind)); 597 cmd_bind->hdr.cmd = PVRDMA_CMD_CREATE_BIND; 598 memcpy(cmd_bind->new_gid, gid->raw, 16); 599 cmd_bind->mtu = ib_mtu_enum_to_int(IB_MTU_1024); 600 cmd_bind->vlan = 0xfff; 601 cmd_bind->index = index; 602 cmd_bind->gid_type = gid_type; 603 604 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 605 if (ret < 0) { 606 dev_warn(&dev->pdev->dev, 607 "could not create binding, error: %d\n", ret); 608 return -EFAULT; 609 } 610 memcpy(&dev->sgid_tbl[index], gid, sizeof(*gid)); 611 return 0; 612 } 613 614 static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context) 615 { 616 struct pvrdma_dev *dev = to_vdev(attr->device); 617 618 return pvrdma_add_gid_at_index(dev, &attr->gid, 619 ib_gid_type_to_pvrdma(attr->gid_type), 620 attr->index); 621 } 622 623 static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index) 624 { 625 int ret; 626 union pvrdma_cmd_req req; 627 struct pvrdma_cmd_destroy_bind *cmd_dest = &req.destroy_bind; 628 629 /* Update sgid table. */ 630 if (!dev->sgid_tbl) { 631 dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); 632 return -EINVAL; 633 } 634 635 memset(cmd_dest, 0, sizeof(*cmd_dest)); 636 cmd_dest->hdr.cmd = PVRDMA_CMD_DESTROY_BIND; 637 memcpy(cmd_dest->dest_gid, &dev->sgid_tbl[index], 16); 638 cmd_dest->index = index; 639 640 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 641 if (ret < 0) { 642 dev_warn(&dev->pdev->dev, 643 "could not destroy binding, error: %d\n", ret); 644 return ret; 645 } 646 memset(&dev->sgid_tbl[index], 0, 16); 647 return 0; 648 } 649 650 static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context) 651 { 652 struct pvrdma_dev *dev = to_vdev(attr->device); 653 654 dev_dbg(&dev->pdev->dev, "removing gid at index %u from %s", 655 attr->index, dev->netdev->name); 656 657 return pvrdma_del_gid_at_index(dev, attr->index); 658 } 659 660 static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, 661 struct net_device *ndev, 662 unsigned long event) 663 { 664 struct pci_dev *pdev_net; 665 unsigned int slot; 666 667 switch (event) { 668 case NETDEV_REBOOT: 669 case NETDEV_DOWN: 670 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); 671 break; 672 case NETDEV_UP: 673 pvrdma_write_reg(dev, PVRDMA_REG_CTL, 674 PVRDMA_DEVICE_CTL_UNQUIESCE); 675 676 mb(); 677 678 if (pvrdma_read_reg(dev, PVRDMA_REG_ERR)) 679 dev_err(&dev->pdev->dev, 680 "failed to activate device during link up\n"); 681 else 682 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); 683 break; 684 case NETDEV_UNREGISTER: 685 ib_device_set_netdev(&dev->ib_dev, NULL, 1); 686 dev_put(dev->netdev); 687 dev->netdev = NULL; 688 break; 689 case NETDEV_REGISTER: 690 /* vmxnet3 will have same bus, slot. But func will be 0 */ 691 slot = PCI_SLOT(dev->pdev->devfn); 692 pdev_net = pci_get_slot(dev->pdev->bus, 693 PCI_DEVFN(slot, 0)); 694 if ((dev->netdev == NULL) && 695 (pci_get_drvdata(pdev_net) == ndev)) { 696 /* this is our netdev */ 697 ib_device_set_netdev(&dev->ib_dev, ndev, 1); 698 dev->netdev = ndev; 699 dev_hold(ndev); 700 } 701 pci_dev_put(pdev_net); 702 break; 703 704 default: 705 dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", 706 event, dev_name(&dev->ib_dev.dev)); 707 break; 708 } 709 } 710 711 static void pvrdma_netdevice_event_work(struct work_struct *work) 712 { 713 struct pvrdma_netdevice_work *netdev_work; 714 struct pvrdma_dev *dev; 715 716 netdev_work = container_of(work, struct pvrdma_netdevice_work, work); 717 718 mutex_lock(&pvrdma_device_list_lock); 719 list_for_each_entry(dev, &pvrdma_device_list, device_link) { 720 if ((netdev_work->event == NETDEV_REGISTER) || 721 (dev->netdev == netdev_work->event_netdev)) { 722 pvrdma_netdevice_event_handle(dev, 723 netdev_work->event_netdev, 724 netdev_work->event); 725 break; 726 } 727 } 728 mutex_unlock(&pvrdma_device_list_lock); 729 730 kfree(netdev_work); 731 } 732 733 static int pvrdma_netdevice_event(struct notifier_block *this, 734 unsigned long event, void *ptr) 735 { 736 struct net_device *event_netdev = netdev_notifier_info_to_dev(ptr); 737 struct pvrdma_netdevice_work *netdev_work; 738 739 netdev_work = kmalloc(sizeof(*netdev_work), GFP_ATOMIC); 740 if (!netdev_work) 741 return NOTIFY_BAD; 742 743 INIT_WORK(&netdev_work->work, pvrdma_netdevice_event_work); 744 netdev_work->event_netdev = event_netdev; 745 netdev_work->event = event; 746 queue_work(event_wq, &netdev_work->work); 747 748 return NOTIFY_DONE; 749 } 750 751 static int pvrdma_pci_probe(struct pci_dev *pdev, 752 const struct pci_device_id *id) 753 { 754 struct pci_dev *pdev_net; 755 struct pvrdma_dev *dev; 756 int ret; 757 unsigned long start; 758 unsigned long len; 759 dma_addr_t slot_dma = 0; 760 761 dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev)); 762 763 /* Allocate zero-out device */ 764 dev = ib_alloc_device(pvrdma_dev, ib_dev); 765 if (!dev) { 766 dev_err(&pdev->dev, "failed to allocate IB device\n"); 767 return -ENOMEM; 768 } 769 770 mutex_lock(&pvrdma_device_list_lock); 771 list_add(&dev->device_link, &pvrdma_device_list); 772 mutex_unlock(&pvrdma_device_list_lock); 773 774 ret = pvrdma_init_device(dev); 775 if (ret) 776 goto err_free_device; 777 778 dev->pdev = pdev; 779 pci_set_drvdata(pdev, dev); 780 781 ret = pci_enable_device(pdev); 782 if (ret) { 783 dev_err(&pdev->dev, "cannot enable PCI device\n"); 784 goto err_free_device; 785 } 786 787 dev_dbg(&pdev->dev, "PCI resource flags BAR0 %#lx\n", 788 pci_resource_flags(pdev, 0)); 789 dev_dbg(&pdev->dev, "PCI resource len %#llx\n", 790 (unsigned long long)pci_resource_len(pdev, 0)); 791 dev_dbg(&pdev->dev, "PCI resource start %#llx\n", 792 (unsigned long long)pci_resource_start(pdev, 0)); 793 dev_dbg(&pdev->dev, "PCI resource flags BAR1 %#lx\n", 794 pci_resource_flags(pdev, 1)); 795 dev_dbg(&pdev->dev, "PCI resource len %#llx\n", 796 (unsigned long long)pci_resource_len(pdev, 1)); 797 dev_dbg(&pdev->dev, "PCI resource start %#llx\n", 798 (unsigned long long)pci_resource_start(pdev, 1)); 799 800 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || 801 !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { 802 dev_err(&pdev->dev, "PCI BAR region not MMIO\n"); 803 ret = -ENOMEM; 804 goto err_disable_pdev; 805 } 806 807 ret = pci_request_regions(pdev, DRV_NAME); 808 if (ret) { 809 dev_err(&pdev->dev, "cannot request PCI resources\n"); 810 goto err_disable_pdev; 811 } 812 813 /* Enable 64-Bit DMA */ 814 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { 815 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 816 if (ret != 0) { 817 dev_err(&pdev->dev, 818 "pci_set_consistent_dma_mask failed\n"); 819 goto err_free_resource; 820 } 821 } else { 822 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 823 if (ret != 0) { 824 dev_err(&pdev->dev, 825 "pci_set_dma_mask failed\n"); 826 goto err_free_resource; 827 } 828 } 829 dma_set_max_seg_size(&pdev->dev, UINT_MAX); 830 pci_set_master(pdev); 831 832 /* Map register space */ 833 start = pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_REG); 834 len = pci_resource_len(dev->pdev, PVRDMA_PCI_RESOURCE_REG); 835 dev->regs = ioremap(start, len); 836 if (!dev->regs) { 837 dev_err(&pdev->dev, "register mapping failed\n"); 838 ret = -ENOMEM; 839 goto err_free_resource; 840 } 841 842 /* Setup per-device UAR. */ 843 dev->driver_uar.index = 0; 844 dev->driver_uar.pfn = 845 pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >> 846 PAGE_SHIFT; 847 dev->driver_uar.map = 848 ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); 849 if (!dev->driver_uar.map) { 850 dev_err(&pdev->dev, "failed to remap UAR pages\n"); 851 ret = -ENOMEM; 852 goto err_unmap_regs; 853 } 854 855 dev->dsr_version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION); 856 dev_info(&pdev->dev, "device version %d, driver version %d\n", 857 dev->dsr_version, PVRDMA_VERSION); 858 859 dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr), 860 &dev->dsrbase, GFP_KERNEL); 861 if (!dev->dsr) { 862 dev_err(&pdev->dev, "failed to allocate shared region\n"); 863 ret = -ENOMEM; 864 goto err_uar_unmap; 865 } 866 867 /* Setup the shared region */ 868 dev->dsr->driver_version = PVRDMA_VERSION; 869 dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ? 870 PVRDMA_GOS_BITS_32 : 871 PVRDMA_GOS_BITS_64; 872 dev->dsr->gos_info.gos_type = PVRDMA_GOS_TYPE_LINUX; 873 dev->dsr->gos_info.gos_ver = 1; 874 875 if (dev->dsr_version < PVRDMA_PPN64_VERSION) 876 dev->dsr->uar_pfn = dev->driver_uar.pfn; 877 else 878 dev->dsr->uar_pfn64 = dev->driver_uar.pfn; 879 880 /* Command slot. */ 881 dev->cmd_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, 882 &slot_dma, GFP_KERNEL); 883 if (!dev->cmd_slot) { 884 ret = -ENOMEM; 885 goto err_free_dsr; 886 } 887 888 dev->dsr->cmd_slot_dma = (u64)slot_dma; 889 890 /* Response slot. */ 891 dev->resp_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, 892 &slot_dma, GFP_KERNEL); 893 if (!dev->resp_slot) { 894 ret = -ENOMEM; 895 goto err_free_slots; 896 } 897 898 dev->dsr->resp_slot_dma = (u64)slot_dma; 899 900 /* Async event ring */ 901 dev->dsr->async_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES; 902 ret = pvrdma_page_dir_init(dev, &dev->async_pdir, 903 dev->dsr->async_ring_pages.num_pages, true); 904 if (ret) 905 goto err_free_slots; 906 dev->async_ring_state = dev->async_pdir.pages[0]; 907 dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma; 908 909 /* CQ notification ring */ 910 dev->dsr->cq_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES; 911 ret = pvrdma_page_dir_init(dev, &dev->cq_pdir, 912 dev->dsr->cq_ring_pages.num_pages, true); 913 if (ret) 914 goto err_free_async_ring; 915 dev->cq_ring_state = dev->cq_pdir.pages[0]; 916 dev->dsr->cq_ring_pages.pdir_dma = dev->cq_pdir.dir_dma; 917 918 /* 919 * Write the PA of the shared region to the device. The writes must be 920 * ordered such that the high bits are written last. When the writes 921 * complete, the device will have filled out the capabilities. 922 */ 923 924 pvrdma_write_reg(dev, PVRDMA_REG_DSRLOW, (u32)dev->dsrbase); 925 pvrdma_write_reg(dev, PVRDMA_REG_DSRHIGH, 926 (u32)((u64)(dev->dsrbase) >> 32)); 927 928 /* Make sure the write is complete before reading status. */ 929 mb(); 930 931 /* The driver supports RoCE V1 and V2. */ 932 if (!PVRDMA_SUPPORTED(dev)) { 933 dev_err(&pdev->dev, "driver needs RoCE v1 or v2 support\n"); 934 ret = -EFAULT; 935 goto err_free_cq_ring; 936 } 937 938 /* Paired vmxnet3 will have same bus, slot. But func will be 0 */ 939 pdev_net = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0)); 940 if (!pdev_net) { 941 dev_err(&pdev->dev, "failed to find paired net device\n"); 942 ret = -ENODEV; 943 goto err_free_cq_ring; 944 } 945 946 if (pdev_net->vendor != PCI_VENDOR_ID_VMWARE || 947 pdev_net->device != PCI_DEVICE_ID_VMWARE_VMXNET3) { 948 dev_err(&pdev->dev, "failed to find paired vmxnet3 device\n"); 949 pci_dev_put(pdev_net); 950 ret = -ENODEV; 951 goto err_free_cq_ring; 952 } 953 954 dev->netdev = pci_get_drvdata(pdev_net); 955 pci_dev_put(pdev_net); 956 if (!dev->netdev) { 957 dev_err(&pdev->dev, "failed to get vmxnet3 device\n"); 958 ret = -ENODEV; 959 goto err_free_cq_ring; 960 } 961 dev_hold(dev->netdev); 962 963 dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name); 964 965 /* Interrupt setup */ 966 ret = pvrdma_alloc_intrs(dev); 967 if (ret) { 968 dev_err(&pdev->dev, "failed to allocate interrupts\n"); 969 ret = -ENOMEM; 970 goto err_free_cq_ring; 971 } 972 973 /* Allocate UAR table. */ 974 ret = pvrdma_uar_table_init(dev); 975 if (ret) { 976 dev_err(&pdev->dev, "failed to allocate UAR table\n"); 977 ret = -ENOMEM; 978 goto err_free_intrs; 979 } 980 981 /* Allocate GID table */ 982 dev->sgid_tbl = kcalloc(dev->dsr->caps.gid_tbl_len, 983 sizeof(union ib_gid), GFP_KERNEL); 984 if (!dev->sgid_tbl) { 985 ret = -ENOMEM; 986 goto err_free_uar_table; 987 } 988 dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len); 989 990 pvrdma_enable_intrs(dev); 991 992 /* Activate pvrdma device */ 993 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE); 994 995 /* Make sure the write is complete before reading status. */ 996 mb(); 997 998 /* Check if device was successfully activated */ 999 ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR); 1000 if (ret != 0) { 1001 dev_err(&pdev->dev, "failed to activate device\n"); 1002 ret = -EFAULT; 1003 goto err_disable_intr; 1004 } 1005 1006 /* Register IB device */ 1007 ret = pvrdma_register_device(dev); 1008 if (ret) { 1009 dev_err(&pdev->dev, "failed to register IB device\n"); 1010 goto err_disable_intr; 1011 } 1012 1013 dev->nb_netdev.notifier_call = pvrdma_netdevice_event; 1014 ret = register_netdevice_notifier(&dev->nb_netdev); 1015 if (ret) { 1016 dev_err(&pdev->dev, "failed to register netdevice events\n"); 1017 goto err_unreg_ibdev; 1018 } 1019 1020 dev_info(&pdev->dev, "attached to device\n"); 1021 return 0; 1022 1023 err_unreg_ibdev: 1024 ib_unregister_device(&dev->ib_dev); 1025 err_disable_intr: 1026 pvrdma_disable_intrs(dev); 1027 kfree(dev->sgid_tbl); 1028 err_free_uar_table: 1029 pvrdma_uar_table_cleanup(dev); 1030 err_free_intrs: 1031 pvrdma_free_irq(dev); 1032 pci_free_irq_vectors(pdev); 1033 err_free_cq_ring: 1034 if (dev->netdev) { 1035 dev_put(dev->netdev); 1036 dev->netdev = NULL; 1037 } 1038 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); 1039 err_free_async_ring: 1040 pvrdma_page_dir_cleanup(dev, &dev->async_pdir); 1041 err_free_slots: 1042 pvrdma_free_slots(dev); 1043 err_free_dsr: 1044 dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr, 1045 dev->dsrbase); 1046 err_uar_unmap: 1047 iounmap(dev->driver_uar.map); 1048 err_unmap_regs: 1049 iounmap(dev->regs); 1050 err_free_resource: 1051 pci_release_regions(pdev); 1052 err_disable_pdev: 1053 pci_disable_device(pdev); 1054 pci_set_drvdata(pdev, NULL); 1055 err_free_device: 1056 mutex_lock(&pvrdma_device_list_lock); 1057 list_del(&dev->device_link); 1058 mutex_unlock(&pvrdma_device_list_lock); 1059 ib_dealloc_device(&dev->ib_dev); 1060 return ret; 1061 } 1062 1063 static void pvrdma_pci_remove(struct pci_dev *pdev) 1064 { 1065 struct pvrdma_dev *dev = pci_get_drvdata(pdev); 1066 1067 if (!dev) 1068 return; 1069 1070 dev_info(&pdev->dev, "detaching from device\n"); 1071 1072 unregister_netdevice_notifier(&dev->nb_netdev); 1073 dev->nb_netdev.notifier_call = NULL; 1074 1075 flush_workqueue(event_wq); 1076 1077 if (dev->netdev) { 1078 dev_put(dev->netdev); 1079 dev->netdev = NULL; 1080 } 1081 1082 /* Unregister ib device */ 1083 ib_unregister_device(&dev->ib_dev); 1084 1085 mutex_lock(&pvrdma_device_list_lock); 1086 list_del(&dev->device_link); 1087 mutex_unlock(&pvrdma_device_list_lock); 1088 1089 pvrdma_disable_intrs(dev); 1090 pvrdma_free_irq(dev); 1091 pci_free_irq_vectors(pdev); 1092 1093 /* Deactivate pvrdma device */ 1094 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET); 1095 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); 1096 pvrdma_page_dir_cleanup(dev, &dev->async_pdir); 1097 pvrdma_free_slots(dev); 1098 dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr, 1099 dev->dsrbase); 1100 1101 iounmap(dev->regs); 1102 kfree(dev->sgid_tbl); 1103 kfree(dev->cq_tbl); 1104 kfree(dev->srq_tbl); 1105 kfree(dev->qp_tbl); 1106 pvrdma_uar_table_cleanup(dev); 1107 iounmap(dev->driver_uar.map); 1108 1109 ib_dealloc_device(&dev->ib_dev); 1110 1111 /* Free pci resources */ 1112 pci_release_regions(pdev); 1113 pci_disable_device(pdev); 1114 pci_set_drvdata(pdev, NULL); 1115 } 1116 1117 static const struct pci_device_id pvrdma_pci_table[] = { 1118 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_PVRDMA), }, 1119 { 0 }, 1120 }; 1121 1122 MODULE_DEVICE_TABLE(pci, pvrdma_pci_table); 1123 1124 static struct pci_driver pvrdma_driver = { 1125 .name = DRV_NAME, 1126 .id_table = pvrdma_pci_table, 1127 .probe = pvrdma_pci_probe, 1128 .remove = pvrdma_pci_remove, 1129 }; 1130 1131 static int __init pvrdma_init(void) 1132 { 1133 int err; 1134 1135 event_wq = alloc_ordered_workqueue("pvrdma_event_wq", WQ_MEM_RECLAIM); 1136 if (!event_wq) 1137 return -ENOMEM; 1138 1139 err = pci_register_driver(&pvrdma_driver); 1140 if (err) 1141 destroy_workqueue(event_wq); 1142 1143 return err; 1144 } 1145 1146 static void __exit pvrdma_cleanup(void) 1147 { 1148 pci_unregister_driver(&pvrdma_driver); 1149 1150 destroy_workqueue(event_wq); 1151 } 1152 1153 module_init(pvrdma_init); 1154 module_exit(pvrdma_cleanup); 1155 1156 MODULE_AUTHOR("VMware, Inc"); 1157 MODULE_DESCRIPTION("VMware Paravirtual RDMA driver"); 1158 MODULE_LICENSE("Dual BSD/GPL"); 1159