1 /* 2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of EITHER the GNU General Public License 6 * version 2 as published by the Free Software Foundation or the BSD 7 * 2-Clause License. This program is distributed in the hope that it 8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED 9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 10 * See the GNU General Public License version 2 for more details at 11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program available in the file COPYING in the main 15 * directory of this source tree. 16 * 17 * The BSD 2-Clause License 18 * 19 * Redistribution and use in source and binary forms, with or 20 * without modification, are permitted provided that the following 21 * conditions are met: 22 * 23 * - Redistributions of source code must retain the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer. 26 * 27 * - Redistributions in binary form must reproduce the above 28 * copyright notice, this list of conditions and the following 29 * disclaimer in the documentation and/or other materials 30 * provided with the distribution. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 43 * OF THE POSSIBILITY OF SUCH DAMAGE. 44 */ 45 46 #include <linux/errno.h> 47 #include <linux/inetdevice.h> 48 #include <linux/init.h> 49 #include <linux/module.h> 50 #include <linux/slab.h> 51 #include <rdma/ib_addr.h> 52 #include <rdma/ib_smi.h> 53 #include <rdma/ib_user_verbs.h> 54 #include <net/addrconf.h> 55 56 #include "pvrdma.h" 57 58 #define DRV_NAME "vmw_pvrdma" 59 #define DRV_VERSION "1.0.0.0-k" 60 61 static DEFINE_MUTEX(pvrdma_device_list_lock); 62 static LIST_HEAD(pvrdma_device_list); 63 static struct workqueue_struct *event_wq; 64 65 static int pvrdma_add_gid(struct ib_device *ibdev, 66 u8 port_num, 67 unsigned int index, 68 const union ib_gid *gid, 69 const struct ib_gid_attr *attr, 70 void **context); 71 static int pvrdma_del_gid(struct ib_device *ibdev, 72 u8 port_num, 73 unsigned int index, 74 void **context); 75 76 77 static ssize_t show_hca(struct device *device, struct device_attribute *attr, 78 char *buf) 79 { 80 return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); 81 } 82 83 static ssize_t show_rev(struct device *device, struct device_attribute *attr, 84 char *buf) 85 { 86 return sprintf(buf, "%d\n", PVRDMA_REV_ID); 87 } 88 89 static ssize_t show_board(struct device *device, struct device_attribute *attr, 90 char *buf) 91 { 92 return sprintf(buf, "%d\n", PVRDMA_BOARD_ID); 93 } 94 95 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 96 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 97 static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 98 99 static struct device_attribute *pvrdma_class_attributes[] = { 100 &dev_attr_hw_rev, 101 &dev_attr_hca_type, 102 &dev_attr_board_id 103 }; 104 105 static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str, 106 size_t str_len) 107 { 108 struct pvrdma_dev *dev = 109 container_of(device, struct pvrdma_dev, ib_dev); 110 snprintf(str, str_len, "%d.%d.%d\n", 111 (int) (dev->dsr->caps.fw_ver >> 32), 112 (int) (dev->dsr->caps.fw_ver >> 16) & 0xffff, 113 (int) dev->dsr->caps.fw_ver & 0xffff); 114 } 115 116 static int pvrdma_init_device(struct pvrdma_dev *dev) 117 { 118 /* Initialize some device related stuff */ 119 spin_lock_init(&dev->cmd_lock); 120 sema_init(&dev->cmd_sema, 1); 121 atomic_set(&dev->num_qps, 0); 122 atomic_set(&dev->num_cqs, 0); 123 atomic_set(&dev->num_pds, 0); 124 atomic_set(&dev->num_ahs, 0); 125 126 return 0; 127 } 128 129 static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num, 130 struct ib_port_immutable *immutable) 131 { 132 struct ib_port_attr attr; 133 int err; 134 135 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; 136 137 err = ib_query_port(ibdev, port_num, &attr); 138 if (err) 139 return err; 140 141 immutable->pkey_tbl_len = attr.pkey_tbl_len; 142 immutable->gid_tbl_len = attr.gid_tbl_len; 143 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 144 return 0; 145 } 146 147 static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev, 148 u8 port_num) 149 { 150 struct net_device *netdev; 151 struct pvrdma_dev *dev = to_vdev(ibdev); 152 153 if (port_num != 1) 154 return NULL; 155 156 rcu_read_lock(); 157 netdev = dev->netdev; 158 if (netdev) 159 dev_hold(netdev); 160 rcu_read_unlock(); 161 162 return netdev; 163 } 164 165 static int pvrdma_register_device(struct pvrdma_dev *dev) 166 { 167 int ret = -1; 168 int i = 0; 169 170 strlcpy(dev->ib_dev.name, "vmw_pvrdma%d", IB_DEVICE_NAME_MAX); 171 dev->ib_dev.node_guid = dev->dsr->caps.node_guid; 172 dev->sys_image_guid = dev->dsr->caps.sys_image_guid; 173 dev->flags = 0; 174 dev->ib_dev.owner = THIS_MODULE; 175 dev->ib_dev.num_comp_vectors = 1; 176 dev->ib_dev.dma_device = &dev->pdev->dev; 177 dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION; 178 dev->ib_dev.uverbs_cmd_mask = 179 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 180 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 181 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 182 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 183 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 184 (1ull << IB_USER_VERBS_CMD_REG_MR) | 185 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 186 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 187 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 188 (1ull << IB_USER_VERBS_CMD_POLL_CQ) | 189 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | 190 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 191 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 192 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 193 (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 194 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 195 (1ull << IB_USER_VERBS_CMD_POST_SEND) | 196 (1ull << IB_USER_VERBS_CMD_POST_RECV) | 197 (1ull << IB_USER_VERBS_CMD_CREATE_AH) | 198 (1ull << IB_USER_VERBS_CMD_DESTROY_AH); 199 200 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 201 dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; 202 203 dev->ib_dev.query_device = pvrdma_query_device; 204 dev->ib_dev.query_port = pvrdma_query_port; 205 dev->ib_dev.query_gid = pvrdma_query_gid; 206 dev->ib_dev.query_pkey = pvrdma_query_pkey; 207 dev->ib_dev.modify_port = pvrdma_modify_port; 208 dev->ib_dev.alloc_ucontext = pvrdma_alloc_ucontext; 209 dev->ib_dev.dealloc_ucontext = pvrdma_dealloc_ucontext; 210 dev->ib_dev.mmap = pvrdma_mmap; 211 dev->ib_dev.alloc_pd = pvrdma_alloc_pd; 212 dev->ib_dev.dealloc_pd = pvrdma_dealloc_pd; 213 dev->ib_dev.create_ah = pvrdma_create_ah; 214 dev->ib_dev.destroy_ah = pvrdma_destroy_ah; 215 dev->ib_dev.create_qp = pvrdma_create_qp; 216 dev->ib_dev.modify_qp = pvrdma_modify_qp; 217 dev->ib_dev.query_qp = pvrdma_query_qp; 218 dev->ib_dev.destroy_qp = pvrdma_destroy_qp; 219 dev->ib_dev.post_send = pvrdma_post_send; 220 dev->ib_dev.post_recv = pvrdma_post_recv; 221 dev->ib_dev.create_cq = pvrdma_create_cq; 222 dev->ib_dev.modify_cq = pvrdma_modify_cq; 223 dev->ib_dev.resize_cq = pvrdma_resize_cq; 224 dev->ib_dev.destroy_cq = pvrdma_destroy_cq; 225 dev->ib_dev.poll_cq = pvrdma_poll_cq; 226 dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq; 227 dev->ib_dev.get_dma_mr = pvrdma_get_dma_mr; 228 dev->ib_dev.reg_user_mr = pvrdma_reg_user_mr; 229 dev->ib_dev.dereg_mr = pvrdma_dereg_mr; 230 dev->ib_dev.alloc_mr = pvrdma_alloc_mr; 231 dev->ib_dev.map_mr_sg = pvrdma_map_mr_sg; 232 dev->ib_dev.add_gid = pvrdma_add_gid; 233 dev->ib_dev.del_gid = pvrdma_del_gid; 234 dev->ib_dev.get_netdev = pvrdma_get_netdev; 235 dev->ib_dev.get_port_immutable = pvrdma_port_immutable; 236 dev->ib_dev.get_link_layer = pvrdma_port_link_layer; 237 dev->ib_dev.get_dev_fw_str = pvrdma_get_fw_ver_str; 238 239 mutex_init(&dev->port_mutex); 240 spin_lock_init(&dev->desc_lock); 241 242 dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(void *), 243 GFP_KERNEL); 244 if (!dev->cq_tbl) 245 return ret; 246 spin_lock_init(&dev->cq_tbl_lock); 247 248 dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(void *), 249 GFP_KERNEL); 250 if (!dev->qp_tbl) 251 goto err_cq_free; 252 spin_lock_init(&dev->qp_tbl_lock); 253 254 ret = ib_register_device(&dev->ib_dev, NULL); 255 if (ret) 256 goto err_qp_free; 257 258 for (i = 0; i < ARRAY_SIZE(pvrdma_class_attributes); ++i) { 259 ret = device_create_file(&dev->ib_dev.dev, 260 pvrdma_class_attributes[i]); 261 if (ret) 262 goto err_class; 263 } 264 265 dev->ib_active = true; 266 267 return 0; 268 269 err_class: 270 ib_unregister_device(&dev->ib_dev); 271 err_qp_free: 272 kfree(dev->qp_tbl); 273 err_cq_free: 274 kfree(dev->cq_tbl); 275 276 return ret; 277 } 278 279 static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id) 280 { 281 u32 icr = PVRDMA_INTR_CAUSE_RESPONSE; 282 struct pvrdma_dev *dev = dev_id; 283 284 dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n"); 285 286 if (!dev->pdev->msix_enabled) { 287 /* Legacy intr */ 288 icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR); 289 if (icr == 0) 290 return IRQ_NONE; 291 } 292 293 if (icr == PVRDMA_INTR_CAUSE_RESPONSE) 294 complete(&dev->cmd_done); 295 296 return IRQ_HANDLED; 297 } 298 299 static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type) 300 { 301 struct pvrdma_qp *qp; 302 unsigned long flags; 303 304 spin_lock_irqsave(&dev->qp_tbl_lock, flags); 305 qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp]; 306 if (qp) 307 atomic_inc(&qp->refcnt); 308 spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); 309 310 if (qp && qp->ibqp.event_handler) { 311 struct ib_qp *ibqp = &qp->ibqp; 312 struct ib_event e; 313 314 e.device = ibqp->device; 315 e.element.qp = ibqp; 316 e.event = type; /* 1:1 mapping for now. */ 317 ibqp->event_handler(&e, ibqp->qp_context); 318 } 319 if (qp) { 320 atomic_dec(&qp->refcnt); 321 if (atomic_read(&qp->refcnt) == 0) 322 wake_up(&qp->wait); 323 } 324 } 325 326 static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type) 327 { 328 struct pvrdma_cq *cq; 329 unsigned long flags; 330 331 spin_lock_irqsave(&dev->cq_tbl_lock, flags); 332 cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq]; 333 if (cq) 334 atomic_inc(&cq->refcnt); 335 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 336 337 if (cq && cq->ibcq.event_handler) { 338 struct ib_cq *ibcq = &cq->ibcq; 339 struct ib_event e; 340 341 e.device = ibcq->device; 342 e.element.cq = ibcq; 343 e.event = type; /* 1:1 mapping for now. */ 344 ibcq->event_handler(&e, ibcq->cq_context); 345 } 346 if (cq) { 347 atomic_dec(&cq->refcnt); 348 if (atomic_read(&cq->refcnt) == 0) 349 wake_up(&cq->wait); 350 } 351 } 352 353 static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port, 354 enum ib_event_type event) 355 { 356 struct ib_event ib_event; 357 358 memset(&ib_event, 0, sizeof(ib_event)); 359 ib_event.device = &dev->ib_dev; 360 ib_event.element.port_num = port; 361 ib_event.event = event; 362 ib_dispatch_event(&ib_event); 363 } 364 365 static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type) 366 { 367 if (port < 1 || port > dev->dsr->caps.phys_port_cnt) { 368 dev_warn(&dev->pdev->dev, "event on port %d\n", port); 369 return; 370 } 371 372 pvrdma_dispatch_event(dev, port, type); 373 } 374 375 static inline struct pvrdma_eqe *get_eqe(struct pvrdma_dev *dev, unsigned int i) 376 { 377 return (struct pvrdma_eqe *)pvrdma_page_dir_get_ptr( 378 &dev->async_pdir, 379 PAGE_SIZE + 380 sizeof(struct pvrdma_eqe) * i); 381 } 382 383 static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id) 384 { 385 struct pvrdma_dev *dev = dev_id; 386 struct pvrdma_ring *ring = &dev->async_ring_state->rx; 387 int ring_slots = (dev->dsr->async_ring_pages.num_pages - 1) * 388 PAGE_SIZE / sizeof(struct pvrdma_eqe); 389 unsigned int head; 390 391 dev_dbg(&dev->pdev->dev, "interrupt 1 (async event) handler\n"); 392 393 /* 394 * Don't process events until the IB device is registered. Otherwise 395 * we'll try to ib_dispatch_event() on an invalid device. 396 */ 397 if (!dev->ib_active) 398 return IRQ_HANDLED; 399 400 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { 401 struct pvrdma_eqe *eqe; 402 403 eqe = get_eqe(dev, head); 404 405 switch (eqe->type) { 406 case PVRDMA_EVENT_QP_FATAL: 407 case PVRDMA_EVENT_QP_REQ_ERR: 408 case PVRDMA_EVENT_QP_ACCESS_ERR: 409 case PVRDMA_EVENT_COMM_EST: 410 case PVRDMA_EVENT_SQ_DRAINED: 411 case PVRDMA_EVENT_PATH_MIG: 412 case PVRDMA_EVENT_PATH_MIG_ERR: 413 case PVRDMA_EVENT_QP_LAST_WQE_REACHED: 414 pvrdma_qp_event(dev, eqe->info, eqe->type); 415 break; 416 417 case PVRDMA_EVENT_CQ_ERR: 418 pvrdma_cq_event(dev, eqe->info, eqe->type); 419 break; 420 421 case PVRDMA_EVENT_SRQ_ERR: 422 case PVRDMA_EVENT_SRQ_LIMIT_REACHED: 423 break; 424 425 case PVRDMA_EVENT_PORT_ACTIVE: 426 case PVRDMA_EVENT_PORT_ERR: 427 case PVRDMA_EVENT_LID_CHANGE: 428 case PVRDMA_EVENT_PKEY_CHANGE: 429 case PVRDMA_EVENT_SM_CHANGE: 430 case PVRDMA_EVENT_CLIENT_REREGISTER: 431 case PVRDMA_EVENT_GID_CHANGE: 432 pvrdma_dev_event(dev, eqe->info, eqe->type); 433 break; 434 435 case PVRDMA_EVENT_DEVICE_FATAL: 436 pvrdma_dev_event(dev, 1, eqe->type); 437 break; 438 439 default: 440 break; 441 } 442 443 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); 444 } 445 446 return IRQ_HANDLED; 447 } 448 449 static inline struct pvrdma_cqne *get_cqne(struct pvrdma_dev *dev, 450 unsigned int i) 451 { 452 return (struct pvrdma_cqne *)pvrdma_page_dir_get_ptr( 453 &dev->cq_pdir, 454 PAGE_SIZE + 455 sizeof(struct pvrdma_cqne) * i); 456 } 457 458 static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) 459 { 460 struct pvrdma_dev *dev = dev_id; 461 struct pvrdma_ring *ring = &dev->cq_ring_state->rx; 462 int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE / 463 sizeof(struct pvrdma_cqne); 464 unsigned int head; 465 unsigned long flags; 466 467 dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n"); 468 469 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { 470 struct pvrdma_cqne *cqne; 471 struct pvrdma_cq *cq; 472 473 cqne = get_cqne(dev, head); 474 spin_lock_irqsave(&dev->cq_tbl_lock, flags); 475 cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq]; 476 if (cq) 477 atomic_inc(&cq->refcnt); 478 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 479 480 if (cq && cq->ibcq.comp_handler) 481 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 482 if (cq) { 483 atomic_dec(&cq->refcnt); 484 if (atomic_read(&cq->refcnt)) 485 wake_up(&cq->wait); 486 } 487 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); 488 } 489 490 return IRQ_HANDLED; 491 } 492 493 static void pvrdma_free_irq(struct pvrdma_dev *dev) 494 { 495 int i; 496 497 dev_dbg(&dev->pdev->dev, "freeing interrupts\n"); 498 for (i = 0; i < dev->nr_vectors; i++) 499 free_irq(pci_irq_vector(dev->pdev, i), dev); 500 } 501 502 static void pvrdma_enable_intrs(struct pvrdma_dev *dev) 503 { 504 dev_dbg(&dev->pdev->dev, "enable interrupts\n"); 505 pvrdma_write_reg(dev, PVRDMA_REG_IMR, 0); 506 } 507 508 static void pvrdma_disable_intrs(struct pvrdma_dev *dev) 509 { 510 dev_dbg(&dev->pdev->dev, "disable interrupts\n"); 511 pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0); 512 } 513 514 static int pvrdma_alloc_intrs(struct pvrdma_dev *dev) 515 { 516 struct pci_dev *pdev = dev->pdev; 517 int ret = 0, i; 518 519 ret = pci_alloc_irq_vectors(pdev, 1, PVRDMA_MAX_INTERRUPTS, 520 PCI_IRQ_MSIX); 521 if (ret < 0) { 522 ret = pci_alloc_irq_vectors(pdev, 1, 1, 523 PCI_IRQ_MSI | PCI_IRQ_LEGACY); 524 if (ret < 0) 525 return ret; 526 } 527 dev->nr_vectors = ret; 528 529 ret = request_irq(pci_irq_vector(dev->pdev, 0), pvrdma_intr0_handler, 530 pdev->msix_enabled ? 0 : IRQF_SHARED, DRV_NAME, dev); 531 if (ret) { 532 dev_err(&dev->pdev->dev, 533 "failed to request interrupt 0\n"); 534 goto out_free_vectors; 535 } 536 537 for (i = 1; i < dev->nr_vectors; i++) { 538 ret = request_irq(pci_irq_vector(dev->pdev, i), 539 i == 1 ? pvrdma_intr1_handler : 540 pvrdma_intrx_handler, 541 0, DRV_NAME, dev); 542 if (ret) { 543 dev_err(&dev->pdev->dev, 544 "failed to request interrupt %d\n", i); 545 goto free_irqs; 546 } 547 } 548 549 return 0; 550 551 free_irqs: 552 while (--i >= 0) 553 free_irq(pci_irq_vector(dev->pdev, i), dev); 554 out_free_vectors: 555 pci_free_irq_vectors(pdev); 556 return ret; 557 } 558 559 static void pvrdma_free_slots(struct pvrdma_dev *dev) 560 { 561 struct pci_dev *pdev = dev->pdev; 562 563 if (dev->resp_slot) 564 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->resp_slot, 565 dev->dsr->resp_slot_dma); 566 if (dev->cmd_slot) 567 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->cmd_slot, 568 dev->dsr->cmd_slot_dma); 569 } 570 571 static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, 572 const union ib_gid *gid, 573 int index) 574 { 575 int ret; 576 union pvrdma_cmd_req req; 577 struct pvrdma_cmd_create_bind *cmd_bind = &req.create_bind; 578 579 if (!dev->sgid_tbl) { 580 dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); 581 return -EINVAL; 582 } 583 584 memset(cmd_bind, 0, sizeof(*cmd_bind)); 585 cmd_bind->hdr.cmd = PVRDMA_CMD_CREATE_BIND; 586 memcpy(cmd_bind->new_gid, gid->raw, 16); 587 cmd_bind->mtu = ib_mtu_enum_to_int(IB_MTU_1024); 588 cmd_bind->vlan = 0xfff; 589 cmd_bind->index = index; 590 cmd_bind->gid_type = PVRDMA_GID_TYPE_FLAG_ROCE_V1; 591 592 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 593 if (ret < 0) { 594 dev_warn(&dev->pdev->dev, 595 "could not create binding, error: %d\n", ret); 596 return -EFAULT; 597 } 598 memcpy(&dev->sgid_tbl[index], gid, sizeof(*gid)); 599 return 0; 600 } 601 602 static int pvrdma_add_gid(struct ib_device *ibdev, 603 u8 port_num, 604 unsigned int index, 605 const union ib_gid *gid, 606 const struct ib_gid_attr *attr, 607 void **context) 608 { 609 struct pvrdma_dev *dev = to_vdev(ibdev); 610 611 return pvrdma_add_gid_at_index(dev, gid, index); 612 } 613 614 static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index) 615 { 616 int ret; 617 union pvrdma_cmd_req req; 618 struct pvrdma_cmd_destroy_bind *cmd_dest = &req.destroy_bind; 619 620 /* Update sgid table. */ 621 if (!dev->sgid_tbl) { 622 dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); 623 return -EINVAL; 624 } 625 626 memset(cmd_dest, 0, sizeof(*cmd_dest)); 627 cmd_dest->hdr.cmd = PVRDMA_CMD_DESTROY_BIND; 628 memcpy(cmd_dest->dest_gid, &dev->sgid_tbl[index], 16); 629 cmd_dest->index = index; 630 631 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 632 if (ret < 0) { 633 dev_warn(&dev->pdev->dev, 634 "could not destroy binding, error: %d\n", ret); 635 return ret; 636 } 637 memset(&dev->sgid_tbl[index], 0, 16); 638 return 0; 639 } 640 641 static int pvrdma_del_gid(struct ib_device *ibdev, 642 u8 port_num, 643 unsigned int index, 644 void **context) 645 { 646 struct pvrdma_dev *dev = to_vdev(ibdev); 647 648 dev_dbg(&dev->pdev->dev, "removing gid at index %u from %s", 649 index, dev->netdev->name); 650 651 return pvrdma_del_gid_at_index(dev, index); 652 } 653 654 static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, 655 unsigned long event) 656 { 657 switch (event) { 658 case NETDEV_REBOOT: 659 case NETDEV_DOWN: 660 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); 661 break; 662 case NETDEV_UP: 663 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); 664 break; 665 default: 666 dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", 667 event, dev->ib_dev.name); 668 break; 669 } 670 } 671 672 static void pvrdma_netdevice_event_work(struct work_struct *work) 673 { 674 struct pvrdma_netdevice_work *netdev_work; 675 struct pvrdma_dev *dev; 676 677 netdev_work = container_of(work, struct pvrdma_netdevice_work, work); 678 679 mutex_lock(&pvrdma_device_list_lock); 680 list_for_each_entry(dev, &pvrdma_device_list, device_link) { 681 if (dev->netdev == netdev_work->event_netdev) { 682 pvrdma_netdevice_event_handle(dev, netdev_work->event); 683 break; 684 } 685 } 686 mutex_unlock(&pvrdma_device_list_lock); 687 688 kfree(netdev_work); 689 } 690 691 static int pvrdma_netdevice_event(struct notifier_block *this, 692 unsigned long event, void *ptr) 693 { 694 struct net_device *event_netdev = netdev_notifier_info_to_dev(ptr); 695 struct pvrdma_netdevice_work *netdev_work; 696 697 netdev_work = kmalloc(sizeof(*netdev_work), GFP_ATOMIC); 698 if (!netdev_work) 699 return NOTIFY_BAD; 700 701 INIT_WORK(&netdev_work->work, pvrdma_netdevice_event_work); 702 netdev_work->event_netdev = event_netdev; 703 netdev_work->event = event; 704 queue_work(event_wq, &netdev_work->work); 705 706 return NOTIFY_DONE; 707 } 708 709 static int pvrdma_pci_probe(struct pci_dev *pdev, 710 const struct pci_device_id *id) 711 { 712 struct pci_dev *pdev_net; 713 struct pvrdma_dev *dev; 714 int ret; 715 unsigned long start; 716 unsigned long len; 717 unsigned int version; 718 dma_addr_t slot_dma = 0; 719 720 dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev)); 721 722 /* Allocate zero-out device */ 723 dev = (struct pvrdma_dev *)ib_alloc_device(sizeof(*dev)); 724 if (!dev) { 725 dev_err(&pdev->dev, "failed to allocate IB device\n"); 726 return -ENOMEM; 727 } 728 729 mutex_lock(&pvrdma_device_list_lock); 730 list_add(&dev->device_link, &pvrdma_device_list); 731 mutex_unlock(&pvrdma_device_list_lock); 732 733 ret = pvrdma_init_device(dev); 734 if (ret) 735 goto err_free_device; 736 737 dev->pdev = pdev; 738 pci_set_drvdata(pdev, dev); 739 740 ret = pci_enable_device(pdev); 741 if (ret) { 742 dev_err(&pdev->dev, "cannot enable PCI device\n"); 743 goto err_free_device; 744 } 745 746 dev_dbg(&pdev->dev, "PCI resource flags BAR0 %#lx\n", 747 pci_resource_flags(pdev, 0)); 748 dev_dbg(&pdev->dev, "PCI resource len %#llx\n", 749 (unsigned long long)pci_resource_len(pdev, 0)); 750 dev_dbg(&pdev->dev, "PCI resource start %#llx\n", 751 (unsigned long long)pci_resource_start(pdev, 0)); 752 dev_dbg(&pdev->dev, "PCI resource flags BAR1 %#lx\n", 753 pci_resource_flags(pdev, 1)); 754 dev_dbg(&pdev->dev, "PCI resource len %#llx\n", 755 (unsigned long long)pci_resource_len(pdev, 1)); 756 dev_dbg(&pdev->dev, "PCI resource start %#llx\n", 757 (unsigned long long)pci_resource_start(pdev, 1)); 758 759 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || 760 !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { 761 dev_err(&pdev->dev, "PCI BAR region not MMIO\n"); 762 ret = -ENOMEM; 763 goto err_free_device; 764 } 765 766 ret = pci_request_regions(pdev, DRV_NAME); 767 if (ret) { 768 dev_err(&pdev->dev, "cannot request PCI resources\n"); 769 goto err_disable_pdev; 770 } 771 772 /* Enable 64-Bit DMA */ 773 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { 774 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 775 if (ret != 0) { 776 dev_err(&pdev->dev, 777 "pci_set_consistent_dma_mask failed\n"); 778 goto err_free_resource; 779 } 780 } else { 781 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 782 if (ret != 0) { 783 dev_err(&pdev->dev, 784 "pci_set_dma_mask failed\n"); 785 goto err_free_resource; 786 } 787 } 788 789 pci_set_master(pdev); 790 791 /* Map register space */ 792 start = pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_REG); 793 len = pci_resource_len(dev->pdev, PVRDMA_PCI_RESOURCE_REG); 794 dev->regs = ioremap(start, len); 795 if (!dev->regs) { 796 dev_err(&pdev->dev, "register mapping failed\n"); 797 ret = -ENOMEM; 798 goto err_free_resource; 799 } 800 801 /* Setup per-device UAR. */ 802 dev->driver_uar.index = 0; 803 dev->driver_uar.pfn = 804 pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >> 805 PAGE_SHIFT; 806 dev->driver_uar.map = 807 ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); 808 if (!dev->driver_uar.map) { 809 dev_err(&pdev->dev, "failed to remap UAR pages\n"); 810 ret = -ENOMEM; 811 goto err_unmap_regs; 812 } 813 814 version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION); 815 dev_info(&pdev->dev, "device version %d, driver version %d\n", 816 version, PVRDMA_VERSION); 817 if (version < PVRDMA_VERSION) { 818 dev_err(&pdev->dev, "incompatible device version\n"); 819 goto err_uar_unmap; 820 } 821 822 dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr), 823 &dev->dsrbase, GFP_KERNEL); 824 if (!dev->dsr) { 825 dev_err(&pdev->dev, "failed to allocate shared region\n"); 826 ret = -ENOMEM; 827 goto err_uar_unmap; 828 } 829 830 /* Setup the shared region */ 831 memset(dev->dsr, 0, sizeof(*dev->dsr)); 832 dev->dsr->driver_version = PVRDMA_VERSION; 833 dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ? 834 PVRDMA_GOS_BITS_32 : 835 PVRDMA_GOS_BITS_64; 836 dev->dsr->gos_info.gos_type = PVRDMA_GOS_TYPE_LINUX; 837 dev->dsr->gos_info.gos_ver = 1; 838 dev->dsr->uar_pfn = dev->driver_uar.pfn; 839 840 /* Command slot. */ 841 dev->cmd_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, 842 &slot_dma, GFP_KERNEL); 843 if (!dev->cmd_slot) { 844 ret = -ENOMEM; 845 goto err_free_dsr; 846 } 847 848 dev->dsr->cmd_slot_dma = (u64)slot_dma; 849 850 /* Response slot. */ 851 dev->resp_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, 852 &slot_dma, GFP_KERNEL); 853 if (!dev->resp_slot) { 854 ret = -ENOMEM; 855 goto err_free_slots; 856 } 857 858 dev->dsr->resp_slot_dma = (u64)slot_dma; 859 860 /* Async event ring */ 861 dev->dsr->async_ring_pages.num_pages = 4; 862 ret = pvrdma_page_dir_init(dev, &dev->async_pdir, 863 dev->dsr->async_ring_pages.num_pages, true); 864 if (ret) 865 goto err_free_slots; 866 dev->async_ring_state = dev->async_pdir.pages[0]; 867 dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma; 868 869 /* CQ notification ring */ 870 dev->dsr->cq_ring_pages.num_pages = 4; 871 ret = pvrdma_page_dir_init(dev, &dev->cq_pdir, 872 dev->dsr->cq_ring_pages.num_pages, true); 873 if (ret) 874 goto err_free_async_ring; 875 dev->cq_ring_state = dev->cq_pdir.pages[0]; 876 dev->dsr->cq_ring_pages.pdir_dma = dev->cq_pdir.dir_dma; 877 878 /* 879 * Write the PA of the shared region to the device. The writes must be 880 * ordered such that the high bits are written last. When the writes 881 * complete, the device will have filled out the capabilities. 882 */ 883 884 pvrdma_write_reg(dev, PVRDMA_REG_DSRLOW, (u32)dev->dsrbase); 885 pvrdma_write_reg(dev, PVRDMA_REG_DSRHIGH, 886 (u32)((u64)(dev->dsrbase) >> 32)); 887 888 /* Make sure the write is complete before reading status. */ 889 mb(); 890 891 /* Currently, the driver only supports RoCE mode. */ 892 if (dev->dsr->caps.mode != PVRDMA_DEVICE_MODE_ROCE) { 893 dev_err(&pdev->dev, "unsupported transport %d\n", 894 dev->dsr->caps.mode); 895 ret = -EFAULT; 896 goto err_free_cq_ring; 897 } 898 899 /* Currently, the driver only supports RoCE V1. */ 900 if (!(dev->dsr->caps.gid_types & PVRDMA_GID_TYPE_FLAG_ROCE_V1)) { 901 dev_err(&pdev->dev, "driver needs RoCE v1 support\n"); 902 ret = -EFAULT; 903 goto err_free_cq_ring; 904 } 905 906 /* Paired vmxnet3 will have same bus, slot. But func will be 0 */ 907 pdev_net = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0)); 908 if (!pdev_net) { 909 dev_err(&pdev->dev, "failed to find paired net device\n"); 910 ret = -ENODEV; 911 goto err_free_cq_ring; 912 } 913 914 if (pdev_net->vendor != PCI_VENDOR_ID_VMWARE || 915 pdev_net->device != PCI_DEVICE_ID_VMWARE_VMXNET3) { 916 dev_err(&pdev->dev, "failed to find paired vmxnet3 device\n"); 917 pci_dev_put(pdev_net); 918 ret = -ENODEV; 919 goto err_free_cq_ring; 920 } 921 922 dev->netdev = pci_get_drvdata(pdev_net); 923 pci_dev_put(pdev_net); 924 if (!dev->netdev) { 925 dev_err(&pdev->dev, "failed to get vmxnet3 device\n"); 926 ret = -ENODEV; 927 goto err_free_cq_ring; 928 } 929 930 dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name); 931 932 /* Interrupt setup */ 933 ret = pvrdma_alloc_intrs(dev); 934 if (ret) { 935 dev_err(&pdev->dev, "failed to allocate interrupts\n"); 936 ret = -ENOMEM; 937 goto err_free_cq_ring; 938 } 939 940 /* Allocate UAR table. */ 941 ret = pvrdma_uar_table_init(dev); 942 if (ret) { 943 dev_err(&pdev->dev, "failed to allocate UAR table\n"); 944 ret = -ENOMEM; 945 goto err_free_intrs; 946 } 947 948 /* Allocate GID table */ 949 dev->sgid_tbl = kcalloc(dev->dsr->caps.gid_tbl_len, 950 sizeof(union ib_gid), GFP_KERNEL); 951 if (!dev->sgid_tbl) { 952 ret = -ENOMEM; 953 goto err_free_uar_table; 954 } 955 dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len); 956 957 pvrdma_enable_intrs(dev); 958 959 /* Activate pvrdma device */ 960 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE); 961 962 /* Make sure the write is complete before reading status. */ 963 mb(); 964 965 /* Check if device was successfully activated */ 966 ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR); 967 if (ret != 0) { 968 dev_err(&pdev->dev, "failed to activate device\n"); 969 ret = -EFAULT; 970 goto err_disable_intr; 971 } 972 973 /* Register IB device */ 974 ret = pvrdma_register_device(dev); 975 if (ret) { 976 dev_err(&pdev->dev, "failed to register IB device\n"); 977 goto err_disable_intr; 978 } 979 980 dev->nb_netdev.notifier_call = pvrdma_netdevice_event; 981 ret = register_netdevice_notifier(&dev->nb_netdev); 982 if (ret) { 983 dev_err(&pdev->dev, "failed to register netdevice events\n"); 984 goto err_unreg_ibdev; 985 } 986 987 dev_info(&pdev->dev, "attached to device\n"); 988 return 0; 989 990 err_unreg_ibdev: 991 ib_unregister_device(&dev->ib_dev); 992 err_disable_intr: 993 pvrdma_disable_intrs(dev); 994 kfree(dev->sgid_tbl); 995 err_free_uar_table: 996 pvrdma_uar_table_cleanup(dev); 997 err_free_intrs: 998 pvrdma_free_irq(dev); 999 pci_free_irq_vectors(pdev); 1000 err_free_cq_ring: 1001 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); 1002 err_free_async_ring: 1003 pvrdma_page_dir_cleanup(dev, &dev->async_pdir); 1004 err_free_slots: 1005 pvrdma_free_slots(dev); 1006 err_free_dsr: 1007 dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr, 1008 dev->dsrbase); 1009 err_uar_unmap: 1010 iounmap(dev->driver_uar.map); 1011 err_unmap_regs: 1012 iounmap(dev->regs); 1013 err_free_resource: 1014 pci_release_regions(pdev); 1015 err_disable_pdev: 1016 pci_disable_device(pdev); 1017 pci_set_drvdata(pdev, NULL); 1018 err_free_device: 1019 mutex_lock(&pvrdma_device_list_lock); 1020 list_del(&dev->device_link); 1021 mutex_unlock(&pvrdma_device_list_lock); 1022 ib_dealloc_device(&dev->ib_dev); 1023 return ret; 1024 } 1025 1026 static void pvrdma_pci_remove(struct pci_dev *pdev) 1027 { 1028 struct pvrdma_dev *dev = pci_get_drvdata(pdev); 1029 1030 if (!dev) 1031 return; 1032 1033 dev_info(&pdev->dev, "detaching from device\n"); 1034 1035 unregister_netdevice_notifier(&dev->nb_netdev); 1036 dev->nb_netdev.notifier_call = NULL; 1037 1038 flush_workqueue(event_wq); 1039 1040 /* Unregister ib device */ 1041 ib_unregister_device(&dev->ib_dev); 1042 1043 mutex_lock(&pvrdma_device_list_lock); 1044 list_del(&dev->device_link); 1045 mutex_unlock(&pvrdma_device_list_lock); 1046 1047 pvrdma_disable_intrs(dev); 1048 pvrdma_free_irq(dev); 1049 pci_free_irq_vectors(pdev); 1050 1051 /* Deactivate pvrdma device */ 1052 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET); 1053 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); 1054 pvrdma_page_dir_cleanup(dev, &dev->async_pdir); 1055 pvrdma_free_slots(dev); 1056 1057 iounmap(dev->regs); 1058 kfree(dev->sgid_tbl); 1059 kfree(dev->cq_tbl); 1060 kfree(dev->qp_tbl); 1061 pvrdma_uar_table_cleanup(dev); 1062 iounmap(dev->driver_uar.map); 1063 1064 ib_dealloc_device(&dev->ib_dev); 1065 1066 /* Free pci resources */ 1067 pci_release_regions(pdev); 1068 pci_disable_device(pdev); 1069 pci_set_drvdata(pdev, NULL); 1070 } 1071 1072 static struct pci_device_id pvrdma_pci_table[] = { 1073 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_PVRDMA), }, 1074 { 0 }, 1075 }; 1076 1077 MODULE_DEVICE_TABLE(pci, pvrdma_pci_table); 1078 1079 static struct pci_driver pvrdma_driver = { 1080 .name = DRV_NAME, 1081 .id_table = pvrdma_pci_table, 1082 .probe = pvrdma_pci_probe, 1083 .remove = pvrdma_pci_remove, 1084 }; 1085 1086 static int __init pvrdma_init(void) 1087 { 1088 int err; 1089 1090 event_wq = alloc_ordered_workqueue("pvrdma_event_wq", WQ_MEM_RECLAIM); 1091 if (!event_wq) 1092 return -ENOMEM; 1093 1094 err = pci_register_driver(&pvrdma_driver); 1095 if (err) 1096 destroy_workqueue(event_wq); 1097 1098 return err; 1099 } 1100 1101 static void __exit pvrdma_cleanup(void) 1102 { 1103 pci_unregister_driver(&pvrdma_driver); 1104 1105 destroy_workqueue(event_wq); 1106 } 1107 1108 module_init(pvrdma_init); 1109 module_exit(pvrdma_cleanup); 1110 1111 MODULE_AUTHOR("VMware, Inc"); 1112 MODULE_DESCRIPTION("VMware Paravirtual RDMA driver"); 1113 MODULE_VERSION(DRV_VERSION); 1114 MODULE_LICENSE("Dual BSD/GPL"); 1115