1 /* 2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of EITHER the GNU General Public License 6 * version 2 as published by the Free Software Foundation or the BSD 7 * 2-Clause License. This program is distributed in the hope that it 8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED 9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 10 * See the GNU General Public License version 2 for more details at 11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program available in the file COPYING in the main 15 * directory of this source tree. 16 * 17 * The BSD 2-Clause License 18 * 19 * Redistribution and use in source and binary forms, with or 20 * without modification, are permitted provided that the following 21 * conditions are met: 22 * 23 * - Redistributions of source code must retain the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer. 26 * 27 * - Redistributions in binary form must reproduce the above 28 * copyright notice, this list of conditions and the following 29 * disclaimer in the documentation and/or other materials 30 * provided with the distribution. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 43 * OF THE POSSIBILITY OF SUCH DAMAGE. 44 */ 45 46 #include <linux/errno.h> 47 #include <linux/inetdevice.h> 48 #include <linux/init.h> 49 #include <linux/module.h> 50 #include <linux/slab.h> 51 #include <rdma/ib_addr.h> 52 #include <rdma/ib_smi.h> 53 #include <rdma/ib_user_verbs.h> 54 #include <net/addrconf.h> 55 56 #include "pvrdma.h" 57 58 #define DRV_NAME "vmw_pvrdma" 59 #define DRV_VERSION "1.0.1.0-k" 60 61 static DEFINE_MUTEX(pvrdma_device_list_lock); 62 static LIST_HEAD(pvrdma_device_list); 63 static struct workqueue_struct *event_wq; 64 65 static int pvrdma_add_gid(struct ib_device *ibdev, 66 u8 port_num, 67 unsigned int index, 68 const union ib_gid *gid, 69 const struct ib_gid_attr *attr, 70 void **context); 71 static int pvrdma_del_gid(struct ib_device *ibdev, 72 u8 port_num, 73 unsigned int index, 74 void **context); 75 76 77 static ssize_t show_hca(struct device *device, struct device_attribute *attr, 78 char *buf) 79 { 80 return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); 81 } 82 83 static ssize_t show_rev(struct device *device, struct device_attribute *attr, 84 char *buf) 85 { 86 return sprintf(buf, "%d\n", PVRDMA_REV_ID); 87 } 88 89 static ssize_t show_board(struct device *device, struct device_attribute *attr, 90 char *buf) 91 { 92 return sprintf(buf, "%d\n", PVRDMA_BOARD_ID); 93 } 94 95 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 96 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 97 static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 98 99 static struct device_attribute *pvrdma_class_attributes[] = { 100 &dev_attr_hw_rev, 101 &dev_attr_hca_type, 102 &dev_attr_board_id 103 }; 104 105 static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str, 106 size_t str_len) 107 { 108 struct pvrdma_dev *dev = 109 container_of(device, struct pvrdma_dev, ib_dev); 110 snprintf(str, str_len, "%d.%d.%d\n", 111 (int) (dev->dsr->caps.fw_ver >> 32), 112 (int) (dev->dsr->caps.fw_ver >> 16) & 0xffff, 113 (int) dev->dsr->caps.fw_ver & 0xffff); 114 } 115 116 static int pvrdma_init_device(struct pvrdma_dev *dev) 117 { 118 /* Initialize some device related stuff */ 119 spin_lock_init(&dev->cmd_lock); 120 sema_init(&dev->cmd_sema, 1); 121 atomic_set(&dev->num_qps, 0); 122 atomic_set(&dev->num_cqs, 0); 123 atomic_set(&dev->num_pds, 0); 124 atomic_set(&dev->num_ahs, 0); 125 126 return 0; 127 } 128 129 static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num, 130 struct ib_port_immutable *immutable) 131 { 132 struct ib_port_attr attr; 133 int err; 134 135 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; 136 137 err = ib_query_port(ibdev, port_num, &attr); 138 if (err) 139 return err; 140 141 immutable->pkey_tbl_len = attr.pkey_tbl_len; 142 immutable->gid_tbl_len = attr.gid_tbl_len; 143 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 144 return 0; 145 } 146 147 static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev, 148 u8 port_num) 149 { 150 struct net_device *netdev; 151 struct pvrdma_dev *dev = to_vdev(ibdev); 152 153 if (port_num != 1) 154 return NULL; 155 156 rcu_read_lock(); 157 netdev = dev->netdev; 158 if (netdev) 159 dev_hold(netdev); 160 rcu_read_unlock(); 161 162 return netdev; 163 } 164 165 static int pvrdma_register_device(struct pvrdma_dev *dev) 166 { 167 int ret = -1; 168 int i = 0; 169 170 strlcpy(dev->ib_dev.name, "vmw_pvrdma%d", IB_DEVICE_NAME_MAX); 171 dev->ib_dev.node_guid = dev->dsr->caps.node_guid; 172 dev->sys_image_guid = dev->dsr->caps.sys_image_guid; 173 dev->flags = 0; 174 dev->ib_dev.owner = THIS_MODULE; 175 dev->ib_dev.num_comp_vectors = 1; 176 dev->ib_dev.dev.parent = &dev->pdev->dev; 177 dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION; 178 dev->ib_dev.uverbs_cmd_mask = 179 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 180 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 181 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 182 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 183 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 184 (1ull << IB_USER_VERBS_CMD_REG_MR) | 185 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 186 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 187 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 188 (1ull << IB_USER_VERBS_CMD_POLL_CQ) | 189 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | 190 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 191 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 192 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 193 (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 194 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 195 (1ull << IB_USER_VERBS_CMD_POST_SEND) | 196 (1ull << IB_USER_VERBS_CMD_POST_RECV) | 197 (1ull << IB_USER_VERBS_CMD_CREATE_AH) | 198 (1ull << IB_USER_VERBS_CMD_DESTROY_AH); 199 200 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 201 dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; 202 203 dev->ib_dev.query_device = pvrdma_query_device; 204 dev->ib_dev.query_port = pvrdma_query_port; 205 dev->ib_dev.query_gid = pvrdma_query_gid; 206 dev->ib_dev.query_pkey = pvrdma_query_pkey; 207 dev->ib_dev.modify_port = pvrdma_modify_port; 208 dev->ib_dev.alloc_ucontext = pvrdma_alloc_ucontext; 209 dev->ib_dev.dealloc_ucontext = pvrdma_dealloc_ucontext; 210 dev->ib_dev.mmap = pvrdma_mmap; 211 dev->ib_dev.alloc_pd = pvrdma_alloc_pd; 212 dev->ib_dev.dealloc_pd = pvrdma_dealloc_pd; 213 dev->ib_dev.create_ah = pvrdma_create_ah; 214 dev->ib_dev.destroy_ah = pvrdma_destroy_ah; 215 dev->ib_dev.create_qp = pvrdma_create_qp; 216 dev->ib_dev.modify_qp = pvrdma_modify_qp; 217 dev->ib_dev.query_qp = pvrdma_query_qp; 218 dev->ib_dev.destroy_qp = pvrdma_destroy_qp; 219 dev->ib_dev.post_send = pvrdma_post_send; 220 dev->ib_dev.post_recv = pvrdma_post_recv; 221 dev->ib_dev.create_cq = pvrdma_create_cq; 222 dev->ib_dev.modify_cq = pvrdma_modify_cq; 223 dev->ib_dev.resize_cq = pvrdma_resize_cq; 224 dev->ib_dev.destroy_cq = pvrdma_destroy_cq; 225 dev->ib_dev.poll_cq = pvrdma_poll_cq; 226 dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq; 227 dev->ib_dev.get_dma_mr = pvrdma_get_dma_mr; 228 dev->ib_dev.reg_user_mr = pvrdma_reg_user_mr; 229 dev->ib_dev.dereg_mr = pvrdma_dereg_mr; 230 dev->ib_dev.alloc_mr = pvrdma_alloc_mr; 231 dev->ib_dev.map_mr_sg = pvrdma_map_mr_sg; 232 dev->ib_dev.add_gid = pvrdma_add_gid; 233 dev->ib_dev.del_gid = pvrdma_del_gid; 234 dev->ib_dev.get_netdev = pvrdma_get_netdev; 235 dev->ib_dev.get_port_immutable = pvrdma_port_immutable; 236 dev->ib_dev.get_link_layer = pvrdma_port_link_layer; 237 dev->ib_dev.get_dev_fw_str = pvrdma_get_fw_ver_str; 238 239 mutex_init(&dev->port_mutex); 240 spin_lock_init(&dev->desc_lock); 241 242 dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(void *), 243 GFP_KERNEL); 244 if (!dev->cq_tbl) 245 return ret; 246 spin_lock_init(&dev->cq_tbl_lock); 247 248 dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(void *), 249 GFP_KERNEL); 250 if (!dev->qp_tbl) 251 goto err_cq_free; 252 spin_lock_init(&dev->qp_tbl_lock); 253 254 ret = ib_register_device(&dev->ib_dev, NULL); 255 if (ret) 256 goto err_qp_free; 257 258 for (i = 0; i < ARRAY_SIZE(pvrdma_class_attributes); ++i) { 259 ret = device_create_file(&dev->ib_dev.dev, 260 pvrdma_class_attributes[i]); 261 if (ret) 262 goto err_class; 263 } 264 265 dev->ib_active = true; 266 267 return 0; 268 269 err_class: 270 ib_unregister_device(&dev->ib_dev); 271 err_qp_free: 272 kfree(dev->qp_tbl); 273 err_cq_free: 274 kfree(dev->cq_tbl); 275 276 return ret; 277 } 278 279 static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id) 280 { 281 u32 icr = PVRDMA_INTR_CAUSE_RESPONSE; 282 struct pvrdma_dev *dev = dev_id; 283 284 dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n"); 285 286 if (!dev->pdev->msix_enabled) { 287 /* Legacy intr */ 288 icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR); 289 if (icr == 0) 290 return IRQ_NONE; 291 } 292 293 if (icr == PVRDMA_INTR_CAUSE_RESPONSE) 294 complete(&dev->cmd_done); 295 296 return IRQ_HANDLED; 297 } 298 299 static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type) 300 { 301 struct pvrdma_qp *qp; 302 unsigned long flags; 303 304 spin_lock_irqsave(&dev->qp_tbl_lock, flags); 305 qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp]; 306 if (qp) 307 atomic_inc(&qp->refcnt); 308 spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); 309 310 if (qp && qp->ibqp.event_handler) { 311 struct ib_qp *ibqp = &qp->ibqp; 312 struct ib_event e; 313 314 e.device = ibqp->device; 315 e.element.qp = ibqp; 316 e.event = type; /* 1:1 mapping for now. */ 317 ibqp->event_handler(&e, ibqp->qp_context); 318 } 319 if (qp) { 320 atomic_dec(&qp->refcnt); 321 if (atomic_read(&qp->refcnt) == 0) 322 wake_up(&qp->wait); 323 } 324 } 325 326 static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type) 327 { 328 struct pvrdma_cq *cq; 329 unsigned long flags; 330 331 spin_lock_irqsave(&dev->cq_tbl_lock, flags); 332 cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq]; 333 if (cq) 334 atomic_inc(&cq->refcnt); 335 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 336 337 if (cq && cq->ibcq.event_handler) { 338 struct ib_cq *ibcq = &cq->ibcq; 339 struct ib_event e; 340 341 e.device = ibcq->device; 342 e.element.cq = ibcq; 343 e.event = type; /* 1:1 mapping for now. */ 344 ibcq->event_handler(&e, ibcq->cq_context); 345 } 346 if (cq) { 347 atomic_dec(&cq->refcnt); 348 if (atomic_read(&cq->refcnt) == 0) 349 wake_up(&cq->wait); 350 } 351 } 352 353 static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port, 354 enum ib_event_type event) 355 { 356 struct ib_event ib_event; 357 358 memset(&ib_event, 0, sizeof(ib_event)); 359 ib_event.device = &dev->ib_dev; 360 ib_event.element.port_num = port; 361 ib_event.event = event; 362 ib_dispatch_event(&ib_event); 363 } 364 365 static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type) 366 { 367 if (port < 1 || port > dev->dsr->caps.phys_port_cnt) { 368 dev_warn(&dev->pdev->dev, "event on port %d\n", port); 369 return; 370 } 371 372 pvrdma_dispatch_event(dev, port, type); 373 } 374 375 static inline struct pvrdma_eqe *get_eqe(struct pvrdma_dev *dev, unsigned int i) 376 { 377 return (struct pvrdma_eqe *)pvrdma_page_dir_get_ptr( 378 &dev->async_pdir, 379 PAGE_SIZE + 380 sizeof(struct pvrdma_eqe) * i); 381 } 382 383 static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id) 384 { 385 struct pvrdma_dev *dev = dev_id; 386 struct pvrdma_ring *ring = &dev->async_ring_state->rx; 387 int ring_slots = (dev->dsr->async_ring_pages.num_pages - 1) * 388 PAGE_SIZE / sizeof(struct pvrdma_eqe); 389 unsigned int head; 390 391 dev_dbg(&dev->pdev->dev, "interrupt 1 (async event) handler\n"); 392 393 /* 394 * Don't process events until the IB device is registered. Otherwise 395 * we'll try to ib_dispatch_event() on an invalid device. 396 */ 397 if (!dev->ib_active) 398 return IRQ_HANDLED; 399 400 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { 401 struct pvrdma_eqe *eqe; 402 403 eqe = get_eqe(dev, head); 404 405 switch (eqe->type) { 406 case PVRDMA_EVENT_QP_FATAL: 407 case PVRDMA_EVENT_QP_REQ_ERR: 408 case PVRDMA_EVENT_QP_ACCESS_ERR: 409 case PVRDMA_EVENT_COMM_EST: 410 case PVRDMA_EVENT_SQ_DRAINED: 411 case PVRDMA_EVENT_PATH_MIG: 412 case PVRDMA_EVENT_PATH_MIG_ERR: 413 case PVRDMA_EVENT_QP_LAST_WQE_REACHED: 414 pvrdma_qp_event(dev, eqe->info, eqe->type); 415 break; 416 417 case PVRDMA_EVENT_CQ_ERR: 418 pvrdma_cq_event(dev, eqe->info, eqe->type); 419 break; 420 421 case PVRDMA_EVENT_SRQ_ERR: 422 case PVRDMA_EVENT_SRQ_LIMIT_REACHED: 423 break; 424 425 case PVRDMA_EVENT_PORT_ACTIVE: 426 case PVRDMA_EVENT_PORT_ERR: 427 case PVRDMA_EVENT_LID_CHANGE: 428 case PVRDMA_EVENT_PKEY_CHANGE: 429 case PVRDMA_EVENT_SM_CHANGE: 430 case PVRDMA_EVENT_CLIENT_REREGISTER: 431 case PVRDMA_EVENT_GID_CHANGE: 432 pvrdma_dev_event(dev, eqe->info, eqe->type); 433 break; 434 435 case PVRDMA_EVENT_DEVICE_FATAL: 436 pvrdma_dev_event(dev, 1, eqe->type); 437 break; 438 439 default: 440 break; 441 } 442 443 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); 444 } 445 446 return IRQ_HANDLED; 447 } 448 449 static inline struct pvrdma_cqne *get_cqne(struct pvrdma_dev *dev, 450 unsigned int i) 451 { 452 return (struct pvrdma_cqne *)pvrdma_page_dir_get_ptr( 453 &dev->cq_pdir, 454 PAGE_SIZE + 455 sizeof(struct pvrdma_cqne) * i); 456 } 457 458 static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) 459 { 460 struct pvrdma_dev *dev = dev_id; 461 struct pvrdma_ring *ring = &dev->cq_ring_state->rx; 462 int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE / 463 sizeof(struct pvrdma_cqne); 464 unsigned int head; 465 unsigned long flags; 466 467 dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n"); 468 469 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { 470 struct pvrdma_cqne *cqne; 471 struct pvrdma_cq *cq; 472 473 cqne = get_cqne(dev, head); 474 spin_lock_irqsave(&dev->cq_tbl_lock, flags); 475 cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq]; 476 if (cq) 477 atomic_inc(&cq->refcnt); 478 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 479 480 if (cq && cq->ibcq.comp_handler) 481 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 482 if (cq) { 483 atomic_dec(&cq->refcnt); 484 if (atomic_read(&cq->refcnt)) 485 wake_up(&cq->wait); 486 } 487 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); 488 } 489 490 return IRQ_HANDLED; 491 } 492 493 static void pvrdma_free_irq(struct pvrdma_dev *dev) 494 { 495 int i; 496 497 dev_dbg(&dev->pdev->dev, "freeing interrupts\n"); 498 for (i = 0; i < dev->nr_vectors; i++) 499 free_irq(pci_irq_vector(dev->pdev, i), dev); 500 } 501 502 static void pvrdma_enable_intrs(struct pvrdma_dev *dev) 503 { 504 dev_dbg(&dev->pdev->dev, "enable interrupts\n"); 505 pvrdma_write_reg(dev, PVRDMA_REG_IMR, 0); 506 } 507 508 static void pvrdma_disable_intrs(struct pvrdma_dev *dev) 509 { 510 dev_dbg(&dev->pdev->dev, "disable interrupts\n"); 511 pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0); 512 } 513 514 static int pvrdma_alloc_intrs(struct pvrdma_dev *dev) 515 { 516 struct pci_dev *pdev = dev->pdev; 517 int ret = 0, i; 518 519 ret = pci_alloc_irq_vectors(pdev, 1, PVRDMA_MAX_INTERRUPTS, 520 PCI_IRQ_MSIX); 521 if (ret < 0) { 522 ret = pci_alloc_irq_vectors(pdev, 1, 1, 523 PCI_IRQ_MSI | PCI_IRQ_LEGACY); 524 if (ret < 0) 525 return ret; 526 } 527 dev->nr_vectors = ret; 528 529 ret = request_irq(pci_irq_vector(dev->pdev, 0), pvrdma_intr0_handler, 530 pdev->msix_enabled ? 0 : IRQF_SHARED, DRV_NAME, dev); 531 if (ret) { 532 dev_err(&dev->pdev->dev, 533 "failed to request interrupt 0\n"); 534 goto out_free_vectors; 535 } 536 537 for (i = 1; i < dev->nr_vectors; i++) { 538 ret = request_irq(pci_irq_vector(dev->pdev, i), 539 i == 1 ? pvrdma_intr1_handler : 540 pvrdma_intrx_handler, 541 0, DRV_NAME, dev); 542 if (ret) { 543 dev_err(&dev->pdev->dev, 544 "failed to request interrupt %d\n", i); 545 goto free_irqs; 546 } 547 } 548 549 return 0; 550 551 free_irqs: 552 while (--i >= 0) 553 free_irq(pci_irq_vector(dev->pdev, i), dev); 554 out_free_vectors: 555 pci_free_irq_vectors(pdev); 556 return ret; 557 } 558 559 static void pvrdma_free_slots(struct pvrdma_dev *dev) 560 { 561 struct pci_dev *pdev = dev->pdev; 562 563 if (dev->resp_slot) 564 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->resp_slot, 565 dev->dsr->resp_slot_dma); 566 if (dev->cmd_slot) 567 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->cmd_slot, 568 dev->dsr->cmd_slot_dma); 569 } 570 571 static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, 572 const union ib_gid *gid, 573 int index) 574 { 575 int ret; 576 union pvrdma_cmd_req req; 577 struct pvrdma_cmd_create_bind *cmd_bind = &req.create_bind; 578 579 if (!dev->sgid_tbl) { 580 dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); 581 return -EINVAL; 582 } 583 584 memset(cmd_bind, 0, sizeof(*cmd_bind)); 585 cmd_bind->hdr.cmd = PVRDMA_CMD_CREATE_BIND; 586 memcpy(cmd_bind->new_gid, gid->raw, 16); 587 cmd_bind->mtu = ib_mtu_enum_to_int(IB_MTU_1024); 588 cmd_bind->vlan = 0xfff; 589 cmd_bind->index = index; 590 cmd_bind->gid_type = PVRDMA_GID_TYPE_FLAG_ROCE_V1; 591 592 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 593 if (ret < 0) { 594 dev_warn(&dev->pdev->dev, 595 "could not create binding, error: %d\n", ret); 596 return -EFAULT; 597 } 598 memcpy(&dev->sgid_tbl[index], gid, sizeof(*gid)); 599 return 0; 600 } 601 602 static int pvrdma_add_gid(struct ib_device *ibdev, 603 u8 port_num, 604 unsigned int index, 605 const union ib_gid *gid, 606 const struct ib_gid_attr *attr, 607 void **context) 608 { 609 struct pvrdma_dev *dev = to_vdev(ibdev); 610 611 return pvrdma_add_gid_at_index(dev, gid, index); 612 } 613 614 static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index) 615 { 616 int ret; 617 union pvrdma_cmd_req req; 618 struct pvrdma_cmd_destroy_bind *cmd_dest = &req.destroy_bind; 619 620 /* Update sgid table. */ 621 if (!dev->sgid_tbl) { 622 dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); 623 return -EINVAL; 624 } 625 626 memset(cmd_dest, 0, sizeof(*cmd_dest)); 627 cmd_dest->hdr.cmd = PVRDMA_CMD_DESTROY_BIND; 628 memcpy(cmd_dest->dest_gid, &dev->sgid_tbl[index], 16); 629 cmd_dest->index = index; 630 631 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 632 if (ret < 0) { 633 dev_warn(&dev->pdev->dev, 634 "could not destroy binding, error: %d\n", ret); 635 return ret; 636 } 637 memset(&dev->sgid_tbl[index], 0, 16); 638 return 0; 639 } 640 641 static int pvrdma_del_gid(struct ib_device *ibdev, 642 u8 port_num, 643 unsigned int index, 644 void **context) 645 { 646 struct pvrdma_dev *dev = to_vdev(ibdev); 647 648 dev_dbg(&dev->pdev->dev, "removing gid at index %u from %s", 649 index, dev->netdev->name); 650 651 return pvrdma_del_gid_at_index(dev, index); 652 } 653 654 static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, 655 unsigned long event) 656 { 657 switch (event) { 658 case NETDEV_REBOOT: 659 case NETDEV_DOWN: 660 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); 661 break; 662 case NETDEV_UP: 663 pvrdma_write_reg(dev, PVRDMA_REG_CTL, 664 PVRDMA_DEVICE_CTL_UNQUIESCE); 665 666 mb(); 667 668 if (pvrdma_read_reg(dev, PVRDMA_REG_ERR)) 669 dev_err(&dev->pdev->dev, 670 "failed to activate device during link up\n"); 671 else 672 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); 673 break; 674 default: 675 dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", 676 event, dev->ib_dev.name); 677 break; 678 } 679 } 680 681 static void pvrdma_netdevice_event_work(struct work_struct *work) 682 { 683 struct pvrdma_netdevice_work *netdev_work; 684 struct pvrdma_dev *dev; 685 686 netdev_work = container_of(work, struct pvrdma_netdevice_work, work); 687 688 mutex_lock(&pvrdma_device_list_lock); 689 list_for_each_entry(dev, &pvrdma_device_list, device_link) { 690 if (dev->netdev == netdev_work->event_netdev) { 691 pvrdma_netdevice_event_handle(dev, netdev_work->event); 692 break; 693 } 694 } 695 mutex_unlock(&pvrdma_device_list_lock); 696 697 kfree(netdev_work); 698 } 699 700 static int pvrdma_netdevice_event(struct notifier_block *this, 701 unsigned long event, void *ptr) 702 { 703 struct net_device *event_netdev = netdev_notifier_info_to_dev(ptr); 704 struct pvrdma_netdevice_work *netdev_work; 705 706 netdev_work = kmalloc(sizeof(*netdev_work), GFP_ATOMIC); 707 if (!netdev_work) 708 return NOTIFY_BAD; 709 710 INIT_WORK(&netdev_work->work, pvrdma_netdevice_event_work); 711 netdev_work->event_netdev = event_netdev; 712 netdev_work->event = event; 713 queue_work(event_wq, &netdev_work->work); 714 715 return NOTIFY_DONE; 716 } 717 718 static int pvrdma_pci_probe(struct pci_dev *pdev, 719 const struct pci_device_id *id) 720 { 721 struct pci_dev *pdev_net; 722 struct pvrdma_dev *dev; 723 int ret; 724 unsigned long start; 725 unsigned long len; 726 unsigned int version; 727 dma_addr_t slot_dma = 0; 728 729 dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev)); 730 731 /* Allocate zero-out device */ 732 dev = (struct pvrdma_dev *)ib_alloc_device(sizeof(*dev)); 733 if (!dev) { 734 dev_err(&pdev->dev, "failed to allocate IB device\n"); 735 return -ENOMEM; 736 } 737 738 mutex_lock(&pvrdma_device_list_lock); 739 list_add(&dev->device_link, &pvrdma_device_list); 740 mutex_unlock(&pvrdma_device_list_lock); 741 742 ret = pvrdma_init_device(dev); 743 if (ret) 744 goto err_free_device; 745 746 dev->pdev = pdev; 747 pci_set_drvdata(pdev, dev); 748 749 ret = pci_enable_device(pdev); 750 if (ret) { 751 dev_err(&pdev->dev, "cannot enable PCI device\n"); 752 goto err_free_device; 753 } 754 755 dev_dbg(&pdev->dev, "PCI resource flags BAR0 %#lx\n", 756 pci_resource_flags(pdev, 0)); 757 dev_dbg(&pdev->dev, "PCI resource len %#llx\n", 758 (unsigned long long)pci_resource_len(pdev, 0)); 759 dev_dbg(&pdev->dev, "PCI resource start %#llx\n", 760 (unsigned long long)pci_resource_start(pdev, 0)); 761 dev_dbg(&pdev->dev, "PCI resource flags BAR1 %#lx\n", 762 pci_resource_flags(pdev, 1)); 763 dev_dbg(&pdev->dev, "PCI resource len %#llx\n", 764 (unsigned long long)pci_resource_len(pdev, 1)); 765 dev_dbg(&pdev->dev, "PCI resource start %#llx\n", 766 (unsigned long long)pci_resource_start(pdev, 1)); 767 768 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || 769 !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { 770 dev_err(&pdev->dev, "PCI BAR region not MMIO\n"); 771 ret = -ENOMEM; 772 goto err_free_device; 773 } 774 775 ret = pci_request_regions(pdev, DRV_NAME); 776 if (ret) { 777 dev_err(&pdev->dev, "cannot request PCI resources\n"); 778 goto err_disable_pdev; 779 } 780 781 /* Enable 64-Bit DMA */ 782 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { 783 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 784 if (ret != 0) { 785 dev_err(&pdev->dev, 786 "pci_set_consistent_dma_mask failed\n"); 787 goto err_free_resource; 788 } 789 } else { 790 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 791 if (ret != 0) { 792 dev_err(&pdev->dev, 793 "pci_set_dma_mask failed\n"); 794 goto err_free_resource; 795 } 796 } 797 798 pci_set_master(pdev); 799 800 /* Map register space */ 801 start = pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_REG); 802 len = pci_resource_len(dev->pdev, PVRDMA_PCI_RESOURCE_REG); 803 dev->regs = ioremap(start, len); 804 if (!dev->regs) { 805 dev_err(&pdev->dev, "register mapping failed\n"); 806 ret = -ENOMEM; 807 goto err_free_resource; 808 } 809 810 /* Setup per-device UAR. */ 811 dev->driver_uar.index = 0; 812 dev->driver_uar.pfn = 813 pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >> 814 PAGE_SHIFT; 815 dev->driver_uar.map = 816 ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); 817 if (!dev->driver_uar.map) { 818 dev_err(&pdev->dev, "failed to remap UAR pages\n"); 819 ret = -ENOMEM; 820 goto err_unmap_regs; 821 } 822 823 version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION); 824 dev_info(&pdev->dev, "device version %d, driver version %d\n", 825 version, PVRDMA_VERSION); 826 if (version < PVRDMA_VERSION) { 827 dev_err(&pdev->dev, "incompatible device version\n"); 828 goto err_uar_unmap; 829 } 830 831 dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr), 832 &dev->dsrbase, GFP_KERNEL); 833 if (!dev->dsr) { 834 dev_err(&pdev->dev, "failed to allocate shared region\n"); 835 ret = -ENOMEM; 836 goto err_uar_unmap; 837 } 838 839 /* Setup the shared region */ 840 memset(dev->dsr, 0, sizeof(*dev->dsr)); 841 dev->dsr->driver_version = PVRDMA_VERSION; 842 dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ? 843 PVRDMA_GOS_BITS_32 : 844 PVRDMA_GOS_BITS_64; 845 dev->dsr->gos_info.gos_type = PVRDMA_GOS_TYPE_LINUX; 846 dev->dsr->gos_info.gos_ver = 1; 847 dev->dsr->uar_pfn = dev->driver_uar.pfn; 848 849 /* Command slot. */ 850 dev->cmd_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, 851 &slot_dma, GFP_KERNEL); 852 if (!dev->cmd_slot) { 853 ret = -ENOMEM; 854 goto err_free_dsr; 855 } 856 857 dev->dsr->cmd_slot_dma = (u64)slot_dma; 858 859 /* Response slot. */ 860 dev->resp_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, 861 &slot_dma, GFP_KERNEL); 862 if (!dev->resp_slot) { 863 ret = -ENOMEM; 864 goto err_free_slots; 865 } 866 867 dev->dsr->resp_slot_dma = (u64)slot_dma; 868 869 /* Async event ring */ 870 dev->dsr->async_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES; 871 ret = pvrdma_page_dir_init(dev, &dev->async_pdir, 872 dev->dsr->async_ring_pages.num_pages, true); 873 if (ret) 874 goto err_free_slots; 875 dev->async_ring_state = dev->async_pdir.pages[0]; 876 dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma; 877 878 /* CQ notification ring */ 879 dev->dsr->cq_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES; 880 ret = pvrdma_page_dir_init(dev, &dev->cq_pdir, 881 dev->dsr->cq_ring_pages.num_pages, true); 882 if (ret) 883 goto err_free_async_ring; 884 dev->cq_ring_state = dev->cq_pdir.pages[0]; 885 dev->dsr->cq_ring_pages.pdir_dma = dev->cq_pdir.dir_dma; 886 887 /* 888 * Write the PA of the shared region to the device. The writes must be 889 * ordered such that the high bits are written last. When the writes 890 * complete, the device will have filled out the capabilities. 891 */ 892 893 pvrdma_write_reg(dev, PVRDMA_REG_DSRLOW, (u32)dev->dsrbase); 894 pvrdma_write_reg(dev, PVRDMA_REG_DSRHIGH, 895 (u32)((u64)(dev->dsrbase) >> 32)); 896 897 /* Make sure the write is complete before reading status. */ 898 mb(); 899 900 /* Currently, the driver only supports RoCE mode. */ 901 if (dev->dsr->caps.mode != PVRDMA_DEVICE_MODE_ROCE) { 902 dev_err(&pdev->dev, "unsupported transport %d\n", 903 dev->dsr->caps.mode); 904 ret = -EFAULT; 905 goto err_free_cq_ring; 906 } 907 908 /* Currently, the driver only supports RoCE V1. */ 909 if (!(dev->dsr->caps.gid_types & PVRDMA_GID_TYPE_FLAG_ROCE_V1)) { 910 dev_err(&pdev->dev, "driver needs RoCE v1 support\n"); 911 ret = -EFAULT; 912 goto err_free_cq_ring; 913 } 914 915 /* Paired vmxnet3 will have same bus, slot. But func will be 0 */ 916 pdev_net = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0)); 917 if (!pdev_net) { 918 dev_err(&pdev->dev, "failed to find paired net device\n"); 919 ret = -ENODEV; 920 goto err_free_cq_ring; 921 } 922 923 if (pdev_net->vendor != PCI_VENDOR_ID_VMWARE || 924 pdev_net->device != PCI_DEVICE_ID_VMWARE_VMXNET3) { 925 dev_err(&pdev->dev, "failed to find paired vmxnet3 device\n"); 926 pci_dev_put(pdev_net); 927 ret = -ENODEV; 928 goto err_free_cq_ring; 929 } 930 931 dev->netdev = pci_get_drvdata(pdev_net); 932 pci_dev_put(pdev_net); 933 if (!dev->netdev) { 934 dev_err(&pdev->dev, "failed to get vmxnet3 device\n"); 935 ret = -ENODEV; 936 goto err_free_cq_ring; 937 } 938 939 dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name); 940 941 /* Interrupt setup */ 942 ret = pvrdma_alloc_intrs(dev); 943 if (ret) { 944 dev_err(&pdev->dev, "failed to allocate interrupts\n"); 945 ret = -ENOMEM; 946 goto err_free_cq_ring; 947 } 948 949 /* Allocate UAR table. */ 950 ret = pvrdma_uar_table_init(dev); 951 if (ret) { 952 dev_err(&pdev->dev, "failed to allocate UAR table\n"); 953 ret = -ENOMEM; 954 goto err_free_intrs; 955 } 956 957 /* Allocate GID table */ 958 dev->sgid_tbl = kcalloc(dev->dsr->caps.gid_tbl_len, 959 sizeof(union ib_gid), GFP_KERNEL); 960 if (!dev->sgid_tbl) { 961 ret = -ENOMEM; 962 goto err_free_uar_table; 963 } 964 dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len); 965 966 pvrdma_enable_intrs(dev); 967 968 /* Activate pvrdma device */ 969 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE); 970 971 /* Make sure the write is complete before reading status. */ 972 mb(); 973 974 /* Check if device was successfully activated */ 975 ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR); 976 if (ret != 0) { 977 dev_err(&pdev->dev, "failed to activate device\n"); 978 ret = -EFAULT; 979 goto err_disable_intr; 980 } 981 982 /* Register IB device */ 983 ret = pvrdma_register_device(dev); 984 if (ret) { 985 dev_err(&pdev->dev, "failed to register IB device\n"); 986 goto err_disable_intr; 987 } 988 989 dev->nb_netdev.notifier_call = pvrdma_netdevice_event; 990 ret = register_netdevice_notifier(&dev->nb_netdev); 991 if (ret) { 992 dev_err(&pdev->dev, "failed to register netdevice events\n"); 993 goto err_unreg_ibdev; 994 } 995 996 dev_info(&pdev->dev, "attached to device\n"); 997 return 0; 998 999 err_unreg_ibdev: 1000 ib_unregister_device(&dev->ib_dev); 1001 err_disable_intr: 1002 pvrdma_disable_intrs(dev); 1003 kfree(dev->sgid_tbl); 1004 err_free_uar_table: 1005 pvrdma_uar_table_cleanup(dev); 1006 err_free_intrs: 1007 pvrdma_free_irq(dev); 1008 pci_free_irq_vectors(pdev); 1009 err_free_cq_ring: 1010 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); 1011 err_free_async_ring: 1012 pvrdma_page_dir_cleanup(dev, &dev->async_pdir); 1013 err_free_slots: 1014 pvrdma_free_slots(dev); 1015 err_free_dsr: 1016 dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr, 1017 dev->dsrbase); 1018 err_uar_unmap: 1019 iounmap(dev->driver_uar.map); 1020 err_unmap_regs: 1021 iounmap(dev->regs); 1022 err_free_resource: 1023 pci_release_regions(pdev); 1024 err_disable_pdev: 1025 pci_disable_device(pdev); 1026 pci_set_drvdata(pdev, NULL); 1027 err_free_device: 1028 mutex_lock(&pvrdma_device_list_lock); 1029 list_del(&dev->device_link); 1030 mutex_unlock(&pvrdma_device_list_lock); 1031 ib_dealloc_device(&dev->ib_dev); 1032 return ret; 1033 } 1034 1035 static void pvrdma_pci_remove(struct pci_dev *pdev) 1036 { 1037 struct pvrdma_dev *dev = pci_get_drvdata(pdev); 1038 1039 if (!dev) 1040 return; 1041 1042 dev_info(&pdev->dev, "detaching from device\n"); 1043 1044 unregister_netdevice_notifier(&dev->nb_netdev); 1045 dev->nb_netdev.notifier_call = NULL; 1046 1047 flush_workqueue(event_wq); 1048 1049 /* Unregister ib device */ 1050 ib_unregister_device(&dev->ib_dev); 1051 1052 mutex_lock(&pvrdma_device_list_lock); 1053 list_del(&dev->device_link); 1054 mutex_unlock(&pvrdma_device_list_lock); 1055 1056 pvrdma_disable_intrs(dev); 1057 pvrdma_free_irq(dev); 1058 pci_free_irq_vectors(pdev); 1059 1060 /* Deactivate pvrdma device */ 1061 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET); 1062 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); 1063 pvrdma_page_dir_cleanup(dev, &dev->async_pdir); 1064 pvrdma_free_slots(dev); 1065 1066 iounmap(dev->regs); 1067 kfree(dev->sgid_tbl); 1068 kfree(dev->cq_tbl); 1069 kfree(dev->qp_tbl); 1070 pvrdma_uar_table_cleanup(dev); 1071 iounmap(dev->driver_uar.map); 1072 1073 ib_dealloc_device(&dev->ib_dev); 1074 1075 /* Free pci resources */ 1076 pci_release_regions(pdev); 1077 pci_disable_device(pdev); 1078 pci_set_drvdata(pdev, NULL); 1079 } 1080 1081 static struct pci_device_id pvrdma_pci_table[] = { 1082 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_PVRDMA), }, 1083 { 0 }, 1084 }; 1085 1086 MODULE_DEVICE_TABLE(pci, pvrdma_pci_table); 1087 1088 static struct pci_driver pvrdma_driver = { 1089 .name = DRV_NAME, 1090 .id_table = pvrdma_pci_table, 1091 .probe = pvrdma_pci_probe, 1092 .remove = pvrdma_pci_remove, 1093 }; 1094 1095 static int __init pvrdma_init(void) 1096 { 1097 int err; 1098 1099 event_wq = alloc_ordered_workqueue("pvrdma_event_wq", WQ_MEM_RECLAIM); 1100 if (!event_wq) 1101 return -ENOMEM; 1102 1103 err = pci_register_driver(&pvrdma_driver); 1104 if (err) 1105 destroy_workqueue(event_wq); 1106 1107 return err; 1108 } 1109 1110 static void __exit pvrdma_cleanup(void) 1111 { 1112 pci_unregister_driver(&pvrdma_driver); 1113 1114 destroy_workqueue(event_wq); 1115 } 1116 1117 module_init(pvrdma_init); 1118 module_exit(pvrdma_cleanup); 1119 1120 MODULE_AUTHOR("VMware, Inc"); 1121 MODULE_DESCRIPTION("VMware Paravirtual RDMA driver"); 1122 MODULE_VERSION(DRV_VERSION); 1123 MODULE_LICENSE("Dual BSD/GPL"); 1124