1 /* 2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of EITHER the GNU General Public License 6 * version 2 as published by the Free Software Foundation or the BSD 7 * 2-Clause License. This program is distributed in the hope that it 8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED 9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 10 * See the GNU General Public License version 2 for more details at 11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program available in the file COPYING in the main 15 * directory of this source tree. 16 * 17 * The BSD 2-Clause License 18 * 19 * Redistribution and use in source and binary forms, with or 20 * without modification, are permitted provided that the following 21 * conditions are met: 22 * 23 * - Redistributions of source code must retain the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer. 26 * 27 * - Redistributions in binary form must reproduce the above 28 * copyright notice, this list of conditions and the following 29 * disclaimer in the documentation and/or other materials 30 * provided with the distribution. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 43 * OF THE POSSIBILITY OF SUCH DAMAGE. 44 */ 45 46 #include <linux/errno.h> 47 #include <linux/inetdevice.h> 48 #include <linux/init.h> 49 #include <linux/module.h> 50 #include <linux/slab.h> 51 #include <rdma/ib_addr.h> 52 #include <rdma/ib_smi.h> 53 #include <rdma/ib_user_verbs.h> 54 #include <net/addrconf.h> 55 56 #include "pvrdma.h" 57 58 #define DRV_NAME "vmw_pvrdma" 59 #define DRV_VERSION "1.0.1.0-k" 60 61 static DEFINE_MUTEX(pvrdma_device_list_lock); 62 static LIST_HEAD(pvrdma_device_list); 63 static struct workqueue_struct *event_wq; 64 65 static int pvrdma_add_gid(const union ib_gid *gid, 66 const struct ib_gid_attr *attr, 67 void **context); 68 static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); 69 70 static ssize_t show_hca(struct device *device, struct device_attribute *attr, 71 char *buf) 72 { 73 return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); 74 } 75 76 static ssize_t show_rev(struct device *device, struct device_attribute *attr, 77 char *buf) 78 { 79 return sprintf(buf, "%d\n", PVRDMA_REV_ID); 80 } 81 82 static ssize_t show_board(struct device *device, struct device_attribute *attr, 83 char *buf) 84 { 85 return sprintf(buf, "%d\n", PVRDMA_BOARD_ID); 86 } 87 88 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 89 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 90 static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 91 92 static struct device_attribute *pvrdma_class_attributes[] = { 93 &dev_attr_hw_rev, 94 &dev_attr_hca_type, 95 &dev_attr_board_id 96 }; 97 98 static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str) 99 { 100 struct pvrdma_dev *dev = 101 container_of(device, struct pvrdma_dev, ib_dev); 102 snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d\n", 103 (int) (dev->dsr->caps.fw_ver >> 32), 104 (int) (dev->dsr->caps.fw_ver >> 16) & 0xffff, 105 (int) dev->dsr->caps.fw_ver & 0xffff); 106 } 107 108 static int pvrdma_init_device(struct pvrdma_dev *dev) 109 { 110 /* Initialize some device related stuff */ 111 spin_lock_init(&dev->cmd_lock); 112 sema_init(&dev->cmd_sema, 1); 113 atomic_set(&dev->num_qps, 0); 114 atomic_set(&dev->num_srqs, 0); 115 atomic_set(&dev->num_cqs, 0); 116 atomic_set(&dev->num_pds, 0); 117 atomic_set(&dev->num_ahs, 0); 118 119 return 0; 120 } 121 122 static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num, 123 struct ib_port_immutable *immutable) 124 { 125 struct pvrdma_dev *dev = to_vdev(ibdev); 126 struct ib_port_attr attr; 127 int err; 128 129 if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V1) 130 immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE; 131 else if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V2) 132 immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; 133 134 err = ib_query_port(ibdev, port_num, &attr); 135 if (err) 136 return err; 137 138 immutable->pkey_tbl_len = attr.pkey_tbl_len; 139 immutable->gid_tbl_len = attr.gid_tbl_len; 140 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 141 return 0; 142 } 143 144 static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev, 145 u8 port_num) 146 { 147 struct net_device *netdev; 148 struct pvrdma_dev *dev = to_vdev(ibdev); 149 150 if (port_num != 1) 151 return NULL; 152 153 rcu_read_lock(); 154 netdev = dev->netdev; 155 if (netdev) 156 dev_hold(netdev); 157 rcu_read_unlock(); 158 159 return netdev; 160 } 161 162 static int pvrdma_register_device(struct pvrdma_dev *dev) 163 { 164 int ret = -1; 165 int i = 0; 166 167 strlcpy(dev->ib_dev.name, "vmw_pvrdma%d", IB_DEVICE_NAME_MAX); 168 dev->ib_dev.node_guid = dev->dsr->caps.node_guid; 169 dev->sys_image_guid = dev->dsr->caps.sys_image_guid; 170 dev->flags = 0; 171 dev->ib_dev.owner = THIS_MODULE; 172 dev->ib_dev.num_comp_vectors = 1; 173 dev->ib_dev.dev.parent = &dev->pdev->dev; 174 dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION; 175 dev->ib_dev.uverbs_cmd_mask = 176 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 177 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 178 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 179 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 180 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 181 (1ull << IB_USER_VERBS_CMD_REG_MR) | 182 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 183 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 184 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 185 (1ull << IB_USER_VERBS_CMD_POLL_CQ) | 186 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | 187 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 188 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 189 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 190 (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 191 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 192 (1ull << IB_USER_VERBS_CMD_POST_SEND) | 193 (1ull << IB_USER_VERBS_CMD_POST_RECV) | 194 (1ull << IB_USER_VERBS_CMD_CREATE_AH) | 195 (1ull << IB_USER_VERBS_CMD_DESTROY_AH); 196 197 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 198 dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; 199 200 dev->ib_dev.query_device = pvrdma_query_device; 201 dev->ib_dev.query_port = pvrdma_query_port; 202 dev->ib_dev.query_gid = pvrdma_query_gid; 203 dev->ib_dev.query_pkey = pvrdma_query_pkey; 204 dev->ib_dev.modify_port = pvrdma_modify_port; 205 dev->ib_dev.alloc_ucontext = pvrdma_alloc_ucontext; 206 dev->ib_dev.dealloc_ucontext = pvrdma_dealloc_ucontext; 207 dev->ib_dev.mmap = pvrdma_mmap; 208 dev->ib_dev.alloc_pd = pvrdma_alloc_pd; 209 dev->ib_dev.dealloc_pd = pvrdma_dealloc_pd; 210 dev->ib_dev.create_ah = pvrdma_create_ah; 211 dev->ib_dev.destroy_ah = pvrdma_destroy_ah; 212 dev->ib_dev.create_qp = pvrdma_create_qp; 213 dev->ib_dev.modify_qp = pvrdma_modify_qp; 214 dev->ib_dev.query_qp = pvrdma_query_qp; 215 dev->ib_dev.destroy_qp = pvrdma_destroy_qp; 216 dev->ib_dev.post_send = pvrdma_post_send; 217 dev->ib_dev.post_recv = pvrdma_post_recv; 218 dev->ib_dev.create_cq = pvrdma_create_cq; 219 dev->ib_dev.modify_cq = pvrdma_modify_cq; 220 dev->ib_dev.resize_cq = pvrdma_resize_cq; 221 dev->ib_dev.destroy_cq = pvrdma_destroy_cq; 222 dev->ib_dev.poll_cq = pvrdma_poll_cq; 223 dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq; 224 dev->ib_dev.get_dma_mr = pvrdma_get_dma_mr; 225 dev->ib_dev.reg_user_mr = pvrdma_reg_user_mr; 226 dev->ib_dev.dereg_mr = pvrdma_dereg_mr; 227 dev->ib_dev.alloc_mr = pvrdma_alloc_mr; 228 dev->ib_dev.map_mr_sg = pvrdma_map_mr_sg; 229 dev->ib_dev.add_gid = pvrdma_add_gid; 230 dev->ib_dev.del_gid = pvrdma_del_gid; 231 dev->ib_dev.get_netdev = pvrdma_get_netdev; 232 dev->ib_dev.get_port_immutable = pvrdma_port_immutable; 233 dev->ib_dev.get_link_layer = pvrdma_port_link_layer; 234 dev->ib_dev.get_dev_fw_str = pvrdma_get_fw_ver_str; 235 236 mutex_init(&dev->port_mutex); 237 spin_lock_init(&dev->desc_lock); 238 239 dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(struct pvrdma_cq *), 240 GFP_KERNEL); 241 if (!dev->cq_tbl) 242 return ret; 243 spin_lock_init(&dev->cq_tbl_lock); 244 245 dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(struct pvrdma_qp *), 246 GFP_KERNEL); 247 if (!dev->qp_tbl) 248 goto err_cq_free; 249 spin_lock_init(&dev->qp_tbl_lock); 250 251 /* Check if SRQ is supported by backend */ 252 if (dev->dsr->caps.max_srq) { 253 dev->ib_dev.uverbs_cmd_mask |= 254 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 255 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 256 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 257 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 258 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); 259 260 dev->ib_dev.create_srq = pvrdma_create_srq; 261 dev->ib_dev.modify_srq = pvrdma_modify_srq; 262 dev->ib_dev.query_srq = pvrdma_query_srq; 263 dev->ib_dev.destroy_srq = pvrdma_destroy_srq; 264 dev->ib_dev.post_srq_recv = pvrdma_post_srq_recv; 265 266 dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, 267 sizeof(struct pvrdma_srq *), 268 GFP_KERNEL); 269 if (!dev->srq_tbl) 270 goto err_qp_free; 271 } 272 dev->ib_dev.driver_id = RDMA_DRIVER_VMW_PVRDMA; 273 spin_lock_init(&dev->srq_tbl_lock); 274 275 ret = ib_register_device(&dev->ib_dev, NULL); 276 if (ret) 277 goto err_srq_free; 278 279 for (i = 0; i < ARRAY_SIZE(pvrdma_class_attributes); ++i) { 280 ret = device_create_file(&dev->ib_dev.dev, 281 pvrdma_class_attributes[i]); 282 if (ret) 283 goto err_class; 284 } 285 286 dev->ib_active = true; 287 288 return 0; 289 290 err_class: 291 ib_unregister_device(&dev->ib_dev); 292 err_srq_free: 293 kfree(dev->srq_tbl); 294 err_qp_free: 295 kfree(dev->qp_tbl); 296 err_cq_free: 297 kfree(dev->cq_tbl); 298 299 return ret; 300 } 301 302 static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id) 303 { 304 u32 icr = PVRDMA_INTR_CAUSE_RESPONSE; 305 struct pvrdma_dev *dev = dev_id; 306 307 dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n"); 308 309 if (!dev->pdev->msix_enabled) { 310 /* Legacy intr */ 311 icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR); 312 if (icr == 0) 313 return IRQ_NONE; 314 } 315 316 if (icr == PVRDMA_INTR_CAUSE_RESPONSE) 317 complete(&dev->cmd_done); 318 319 return IRQ_HANDLED; 320 } 321 322 static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type) 323 { 324 struct pvrdma_qp *qp; 325 unsigned long flags; 326 327 spin_lock_irqsave(&dev->qp_tbl_lock, flags); 328 qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp]; 329 if (qp) 330 refcount_inc(&qp->refcnt); 331 spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); 332 333 if (qp && qp->ibqp.event_handler) { 334 struct ib_qp *ibqp = &qp->ibqp; 335 struct ib_event e; 336 337 e.device = ibqp->device; 338 e.element.qp = ibqp; 339 e.event = type; /* 1:1 mapping for now. */ 340 ibqp->event_handler(&e, ibqp->qp_context); 341 } 342 if (qp) { 343 if (refcount_dec_and_test(&qp->refcnt)) 344 complete(&qp->free); 345 } 346 } 347 348 static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type) 349 { 350 struct pvrdma_cq *cq; 351 unsigned long flags; 352 353 spin_lock_irqsave(&dev->cq_tbl_lock, flags); 354 cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq]; 355 if (cq) 356 refcount_inc(&cq->refcnt); 357 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 358 359 if (cq && cq->ibcq.event_handler) { 360 struct ib_cq *ibcq = &cq->ibcq; 361 struct ib_event e; 362 363 e.device = ibcq->device; 364 e.element.cq = ibcq; 365 e.event = type; /* 1:1 mapping for now. */ 366 ibcq->event_handler(&e, ibcq->cq_context); 367 } 368 if (cq) { 369 if (refcount_dec_and_test(&cq->refcnt)) 370 complete(&cq->free); 371 } 372 } 373 374 static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type) 375 { 376 struct pvrdma_srq *srq; 377 unsigned long flags; 378 379 spin_lock_irqsave(&dev->srq_tbl_lock, flags); 380 if (dev->srq_tbl) 381 srq = dev->srq_tbl[srqn % dev->dsr->caps.max_srq]; 382 else 383 srq = NULL; 384 if (srq) 385 refcount_inc(&srq->refcnt); 386 spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); 387 388 if (srq && srq->ibsrq.event_handler) { 389 struct ib_srq *ibsrq = &srq->ibsrq; 390 struct ib_event e; 391 392 e.device = ibsrq->device; 393 e.element.srq = ibsrq; 394 e.event = type; /* 1:1 mapping for now. */ 395 ibsrq->event_handler(&e, ibsrq->srq_context); 396 } 397 if (srq) { 398 if (refcount_dec_and_test(&srq->refcnt)) 399 complete(&srq->free); 400 } 401 } 402 403 static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port, 404 enum ib_event_type event) 405 { 406 struct ib_event ib_event; 407 408 memset(&ib_event, 0, sizeof(ib_event)); 409 ib_event.device = &dev->ib_dev; 410 ib_event.element.port_num = port; 411 ib_event.event = event; 412 ib_dispatch_event(&ib_event); 413 } 414 415 static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type) 416 { 417 if (port < 1 || port > dev->dsr->caps.phys_port_cnt) { 418 dev_warn(&dev->pdev->dev, "event on port %d\n", port); 419 return; 420 } 421 422 pvrdma_dispatch_event(dev, port, type); 423 } 424 425 static inline struct pvrdma_eqe *get_eqe(struct pvrdma_dev *dev, unsigned int i) 426 { 427 return (struct pvrdma_eqe *)pvrdma_page_dir_get_ptr( 428 &dev->async_pdir, 429 PAGE_SIZE + 430 sizeof(struct pvrdma_eqe) * i); 431 } 432 433 static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id) 434 { 435 struct pvrdma_dev *dev = dev_id; 436 struct pvrdma_ring *ring = &dev->async_ring_state->rx; 437 int ring_slots = (dev->dsr->async_ring_pages.num_pages - 1) * 438 PAGE_SIZE / sizeof(struct pvrdma_eqe); 439 unsigned int head; 440 441 dev_dbg(&dev->pdev->dev, "interrupt 1 (async event) handler\n"); 442 443 /* 444 * Don't process events until the IB device is registered. Otherwise 445 * we'll try to ib_dispatch_event() on an invalid device. 446 */ 447 if (!dev->ib_active) 448 return IRQ_HANDLED; 449 450 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { 451 struct pvrdma_eqe *eqe; 452 453 eqe = get_eqe(dev, head); 454 455 switch (eqe->type) { 456 case PVRDMA_EVENT_QP_FATAL: 457 case PVRDMA_EVENT_QP_REQ_ERR: 458 case PVRDMA_EVENT_QP_ACCESS_ERR: 459 case PVRDMA_EVENT_COMM_EST: 460 case PVRDMA_EVENT_SQ_DRAINED: 461 case PVRDMA_EVENT_PATH_MIG: 462 case PVRDMA_EVENT_PATH_MIG_ERR: 463 case PVRDMA_EVENT_QP_LAST_WQE_REACHED: 464 pvrdma_qp_event(dev, eqe->info, eqe->type); 465 break; 466 467 case PVRDMA_EVENT_CQ_ERR: 468 pvrdma_cq_event(dev, eqe->info, eqe->type); 469 break; 470 471 case PVRDMA_EVENT_SRQ_ERR: 472 case PVRDMA_EVENT_SRQ_LIMIT_REACHED: 473 pvrdma_srq_event(dev, eqe->info, eqe->type); 474 break; 475 476 case PVRDMA_EVENT_PORT_ACTIVE: 477 case PVRDMA_EVENT_PORT_ERR: 478 case PVRDMA_EVENT_LID_CHANGE: 479 case PVRDMA_EVENT_PKEY_CHANGE: 480 case PVRDMA_EVENT_SM_CHANGE: 481 case PVRDMA_EVENT_CLIENT_REREGISTER: 482 case PVRDMA_EVENT_GID_CHANGE: 483 pvrdma_dev_event(dev, eqe->info, eqe->type); 484 break; 485 486 case PVRDMA_EVENT_DEVICE_FATAL: 487 pvrdma_dev_event(dev, 1, eqe->type); 488 break; 489 490 default: 491 break; 492 } 493 494 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); 495 } 496 497 return IRQ_HANDLED; 498 } 499 500 static inline struct pvrdma_cqne *get_cqne(struct pvrdma_dev *dev, 501 unsigned int i) 502 { 503 return (struct pvrdma_cqne *)pvrdma_page_dir_get_ptr( 504 &dev->cq_pdir, 505 PAGE_SIZE + 506 sizeof(struct pvrdma_cqne) * i); 507 } 508 509 static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) 510 { 511 struct pvrdma_dev *dev = dev_id; 512 struct pvrdma_ring *ring = &dev->cq_ring_state->rx; 513 int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE / 514 sizeof(struct pvrdma_cqne); 515 unsigned int head; 516 unsigned long flags; 517 518 dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n"); 519 520 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { 521 struct pvrdma_cqne *cqne; 522 struct pvrdma_cq *cq; 523 524 cqne = get_cqne(dev, head); 525 spin_lock_irqsave(&dev->cq_tbl_lock, flags); 526 cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq]; 527 if (cq) 528 refcount_inc(&cq->refcnt); 529 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 530 531 if (cq && cq->ibcq.comp_handler) 532 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 533 if (cq) { 534 if (refcount_dec_and_test(&cq->refcnt)) 535 complete(&cq->free); 536 } 537 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); 538 } 539 540 return IRQ_HANDLED; 541 } 542 543 static void pvrdma_free_irq(struct pvrdma_dev *dev) 544 { 545 int i; 546 547 dev_dbg(&dev->pdev->dev, "freeing interrupts\n"); 548 for (i = 0; i < dev->nr_vectors; i++) 549 free_irq(pci_irq_vector(dev->pdev, i), dev); 550 } 551 552 static void pvrdma_enable_intrs(struct pvrdma_dev *dev) 553 { 554 dev_dbg(&dev->pdev->dev, "enable interrupts\n"); 555 pvrdma_write_reg(dev, PVRDMA_REG_IMR, 0); 556 } 557 558 static void pvrdma_disable_intrs(struct pvrdma_dev *dev) 559 { 560 dev_dbg(&dev->pdev->dev, "disable interrupts\n"); 561 pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0); 562 } 563 564 static int pvrdma_alloc_intrs(struct pvrdma_dev *dev) 565 { 566 struct pci_dev *pdev = dev->pdev; 567 int ret = 0, i; 568 569 ret = pci_alloc_irq_vectors(pdev, 1, PVRDMA_MAX_INTERRUPTS, 570 PCI_IRQ_MSIX); 571 if (ret < 0) { 572 ret = pci_alloc_irq_vectors(pdev, 1, 1, 573 PCI_IRQ_MSI | PCI_IRQ_LEGACY); 574 if (ret < 0) 575 return ret; 576 } 577 dev->nr_vectors = ret; 578 579 ret = request_irq(pci_irq_vector(dev->pdev, 0), pvrdma_intr0_handler, 580 pdev->msix_enabled ? 0 : IRQF_SHARED, DRV_NAME, dev); 581 if (ret) { 582 dev_err(&dev->pdev->dev, 583 "failed to request interrupt 0\n"); 584 goto out_free_vectors; 585 } 586 587 for (i = 1; i < dev->nr_vectors; i++) { 588 ret = request_irq(pci_irq_vector(dev->pdev, i), 589 i == 1 ? pvrdma_intr1_handler : 590 pvrdma_intrx_handler, 591 0, DRV_NAME, dev); 592 if (ret) { 593 dev_err(&dev->pdev->dev, 594 "failed to request interrupt %d\n", i); 595 goto free_irqs; 596 } 597 } 598 599 return 0; 600 601 free_irqs: 602 while (--i >= 0) 603 free_irq(pci_irq_vector(dev->pdev, i), dev); 604 out_free_vectors: 605 pci_free_irq_vectors(pdev); 606 return ret; 607 } 608 609 static void pvrdma_free_slots(struct pvrdma_dev *dev) 610 { 611 struct pci_dev *pdev = dev->pdev; 612 613 if (dev->resp_slot) 614 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->resp_slot, 615 dev->dsr->resp_slot_dma); 616 if (dev->cmd_slot) 617 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->cmd_slot, 618 dev->dsr->cmd_slot_dma); 619 } 620 621 static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, 622 const union ib_gid *gid, 623 u8 gid_type, 624 int index) 625 { 626 int ret; 627 union pvrdma_cmd_req req; 628 struct pvrdma_cmd_create_bind *cmd_bind = &req.create_bind; 629 630 if (!dev->sgid_tbl) { 631 dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); 632 return -EINVAL; 633 } 634 635 memset(cmd_bind, 0, sizeof(*cmd_bind)); 636 cmd_bind->hdr.cmd = PVRDMA_CMD_CREATE_BIND; 637 memcpy(cmd_bind->new_gid, gid->raw, 16); 638 cmd_bind->mtu = ib_mtu_enum_to_int(IB_MTU_1024); 639 cmd_bind->vlan = 0xfff; 640 cmd_bind->index = index; 641 cmd_bind->gid_type = gid_type; 642 643 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 644 if (ret < 0) { 645 dev_warn(&dev->pdev->dev, 646 "could not create binding, error: %d\n", ret); 647 return -EFAULT; 648 } 649 memcpy(&dev->sgid_tbl[index], gid, sizeof(*gid)); 650 return 0; 651 } 652 653 static int pvrdma_add_gid(const union ib_gid *gid, 654 const struct ib_gid_attr *attr, 655 void **context) 656 { 657 struct pvrdma_dev *dev = to_vdev(attr->device); 658 659 return pvrdma_add_gid_at_index(dev, gid, 660 ib_gid_type_to_pvrdma(attr->gid_type), 661 attr->index); 662 } 663 664 static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index) 665 { 666 int ret; 667 union pvrdma_cmd_req req; 668 struct pvrdma_cmd_destroy_bind *cmd_dest = &req.destroy_bind; 669 670 /* Update sgid table. */ 671 if (!dev->sgid_tbl) { 672 dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); 673 return -EINVAL; 674 } 675 676 memset(cmd_dest, 0, sizeof(*cmd_dest)); 677 cmd_dest->hdr.cmd = PVRDMA_CMD_DESTROY_BIND; 678 memcpy(cmd_dest->dest_gid, &dev->sgid_tbl[index], 16); 679 cmd_dest->index = index; 680 681 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 682 if (ret < 0) { 683 dev_warn(&dev->pdev->dev, 684 "could not destroy binding, error: %d\n", ret); 685 return ret; 686 } 687 memset(&dev->sgid_tbl[index], 0, 16); 688 return 0; 689 } 690 691 static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context) 692 { 693 struct pvrdma_dev *dev = to_vdev(attr->device); 694 695 dev_dbg(&dev->pdev->dev, "removing gid at index %u from %s", 696 attr->index, dev->netdev->name); 697 698 return pvrdma_del_gid_at_index(dev, attr->index); 699 } 700 701 static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, 702 unsigned long event) 703 { 704 switch (event) { 705 case NETDEV_REBOOT: 706 case NETDEV_DOWN: 707 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); 708 break; 709 case NETDEV_UP: 710 pvrdma_write_reg(dev, PVRDMA_REG_CTL, 711 PVRDMA_DEVICE_CTL_UNQUIESCE); 712 713 mb(); 714 715 if (pvrdma_read_reg(dev, PVRDMA_REG_ERR)) 716 dev_err(&dev->pdev->dev, 717 "failed to activate device during link up\n"); 718 else 719 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); 720 break; 721 default: 722 dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", 723 event, dev->ib_dev.name); 724 break; 725 } 726 } 727 728 static void pvrdma_netdevice_event_work(struct work_struct *work) 729 { 730 struct pvrdma_netdevice_work *netdev_work; 731 struct pvrdma_dev *dev; 732 733 netdev_work = container_of(work, struct pvrdma_netdevice_work, work); 734 735 mutex_lock(&pvrdma_device_list_lock); 736 list_for_each_entry(dev, &pvrdma_device_list, device_link) { 737 if (dev->netdev == netdev_work->event_netdev) { 738 pvrdma_netdevice_event_handle(dev, netdev_work->event); 739 break; 740 } 741 } 742 mutex_unlock(&pvrdma_device_list_lock); 743 744 kfree(netdev_work); 745 } 746 747 static int pvrdma_netdevice_event(struct notifier_block *this, 748 unsigned long event, void *ptr) 749 { 750 struct net_device *event_netdev = netdev_notifier_info_to_dev(ptr); 751 struct pvrdma_netdevice_work *netdev_work; 752 753 netdev_work = kmalloc(sizeof(*netdev_work), GFP_ATOMIC); 754 if (!netdev_work) 755 return NOTIFY_BAD; 756 757 INIT_WORK(&netdev_work->work, pvrdma_netdevice_event_work); 758 netdev_work->event_netdev = event_netdev; 759 netdev_work->event = event; 760 queue_work(event_wq, &netdev_work->work); 761 762 return NOTIFY_DONE; 763 } 764 765 static int pvrdma_pci_probe(struct pci_dev *pdev, 766 const struct pci_device_id *id) 767 { 768 struct pci_dev *pdev_net; 769 struct pvrdma_dev *dev; 770 int ret; 771 unsigned long start; 772 unsigned long len; 773 dma_addr_t slot_dma = 0; 774 775 dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev)); 776 777 /* Allocate zero-out device */ 778 dev = (struct pvrdma_dev *)ib_alloc_device(sizeof(*dev)); 779 if (!dev) { 780 dev_err(&pdev->dev, "failed to allocate IB device\n"); 781 return -ENOMEM; 782 } 783 784 mutex_lock(&pvrdma_device_list_lock); 785 list_add(&dev->device_link, &pvrdma_device_list); 786 mutex_unlock(&pvrdma_device_list_lock); 787 788 ret = pvrdma_init_device(dev); 789 if (ret) 790 goto err_free_device; 791 792 dev->pdev = pdev; 793 pci_set_drvdata(pdev, dev); 794 795 ret = pci_enable_device(pdev); 796 if (ret) { 797 dev_err(&pdev->dev, "cannot enable PCI device\n"); 798 goto err_free_device; 799 } 800 801 dev_dbg(&pdev->dev, "PCI resource flags BAR0 %#lx\n", 802 pci_resource_flags(pdev, 0)); 803 dev_dbg(&pdev->dev, "PCI resource len %#llx\n", 804 (unsigned long long)pci_resource_len(pdev, 0)); 805 dev_dbg(&pdev->dev, "PCI resource start %#llx\n", 806 (unsigned long long)pci_resource_start(pdev, 0)); 807 dev_dbg(&pdev->dev, "PCI resource flags BAR1 %#lx\n", 808 pci_resource_flags(pdev, 1)); 809 dev_dbg(&pdev->dev, "PCI resource len %#llx\n", 810 (unsigned long long)pci_resource_len(pdev, 1)); 811 dev_dbg(&pdev->dev, "PCI resource start %#llx\n", 812 (unsigned long long)pci_resource_start(pdev, 1)); 813 814 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || 815 !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { 816 dev_err(&pdev->dev, "PCI BAR region not MMIO\n"); 817 ret = -ENOMEM; 818 goto err_free_device; 819 } 820 821 ret = pci_request_regions(pdev, DRV_NAME); 822 if (ret) { 823 dev_err(&pdev->dev, "cannot request PCI resources\n"); 824 goto err_disable_pdev; 825 } 826 827 /* Enable 64-Bit DMA */ 828 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { 829 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 830 if (ret != 0) { 831 dev_err(&pdev->dev, 832 "pci_set_consistent_dma_mask failed\n"); 833 goto err_free_resource; 834 } 835 } else { 836 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 837 if (ret != 0) { 838 dev_err(&pdev->dev, 839 "pci_set_dma_mask failed\n"); 840 goto err_free_resource; 841 } 842 } 843 844 pci_set_master(pdev); 845 846 /* Map register space */ 847 start = pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_REG); 848 len = pci_resource_len(dev->pdev, PVRDMA_PCI_RESOURCE_REG); 849 dev->regs = ioremap(start, len); 850 if (!dev->regs) { 851 dev_err(&pdev->dev, "register mapping failed\n"); 852 ret = -ENOMEM; 853 goto err_free_resource; 854 } 855 856 /* Setup per-device UAR. */ 857 dev->driver_uar.index = 0; 858 dev->driver_uar.pfn = 859 pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >> 860 PAGE_SHIFT; 861 dev->driver_uar.map = 862 ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); 863 if (!dev->driver_uar.map) { 864 dev_err(&pdev->dev, "failed to remap UAR pages\n"); 865 ret = -ENOMEM; 866 goto err_unmap_regs; 867 } 868 869 dev->dsr_version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION); 870 dev_info(&pdev->dev, "device version %d, driver version %d\n", 871 dev->dsr_version, PVRDMA_VERSION); 872 873 dev->dsr = dma_zalloc_coherent(&pdev->dev, sizeof(*dev->dsr), 874 &dev->dsrbase, GFP_KERNEL); 875 if (!dev->dsr) { 876 dev_err(&pdev->dev, "failed to allocate shared region\n"); 877 ret = -ENOMEM; 878 goto err_uar_unmap; 879 } 880 881 /* Setup the shared region */ 882 dev->dsr->driver_version = PVRDMA_VERSION; 883 dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ? 884 PVRDMA_GOS_BITS_32 : 885 PVRDMA_GOS_BITS_64; 886 dev->dsr->gos_info.gos_type = PVRDMA_GOS_TYPE_LINUX; 887 dev->dsr->gos_info.gos_ver = 1; 888 dev->dsr->uar_pfn = dev->driver_uar.pfn; 889 890 /* Command slot. */ 891 dev->cmd_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, 892 &slot_dma, GFP_KERNEL); 893 if (!dev->cmd_slot) { 894 ret = -ENOMEM; 895 goto err_free_dsr; 896 } 897 898 dev->dsr->cmd_slot_dma = (u64)slot_dma; 899 900 /* Response slot. */ 901 dev->resp_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, 902 &slot_dma, GFP_KERNEL); 903 if (!dev->resp_slot) { 904 ret = -ENOMEM; 905 goto err_free_slots; 906 } 907 908 dev->dsr->resp_slot_dma = (u64)slot_dma; 909 910 /* Async event ring */ 911 dev->dsr->async_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES; 912 ret = pvrdma_page_dir_init(dev, &dev->async_pdir, 913 dev->dsr->async_ring_pages.num_pages, true); 914 if (ret) 915 goto err_free_slots; 916 dev->async_ring_state = dev->async_pdir.pages[0]; 917 dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma; 918 919 /* CQ notification ring */ 920 dev->dsr->cq_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES; 921 ret = pvrdma_page_dir_init(dev, &dev->cq_pdir, 922 dev->dsr->cq_ring_pages.num_pages, true); 923 if (ret) 924 goto err_free_async_ring; 925 dev->cq_ring_state = dev->cq_pdir.pages[0]; 926 dev->dsr->cq_ring_pages.pdir_dma = dev->cq_pdir.dir_dma; 927 928 /* 929 * Write the PA of the shared region to the device. The writes must be 930 * ordered such that the high bits are written last. When the writes 931 * complete, the device will have filled out the capabilities. 932 */ 933 934 pvrdma_write_reg(dev, PVRDMA_REG_DSRLOW, (u32)dev->dsrbase); 935 pvrdma_write_reg(dev, PVRDMA_REG_DSRHIGH, 936 (u32)((u64)(dev->dsrbase) >> 32)); 937 938 /* Make sure the write is complete before reading status. */ 939 mb(); 940 941 /* The driver supports RoCE V1 and V2. */ 942 if (!PVRDMA_SUPPORTED(dev)) { 943 dev_err(&pdev->dev, "driver needs RoCE v1 or v2 support\n"); 944 ret = -EFAULT; 945 goto err_free_cq_ring; 946 } 947 948 /* Paired vmxnet3 will have same bus, slot. But func will be 0 */ 949 pdev_net = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0)); 950 if (!pdev_net) { 951 dev_err(&pdev->dev, "failed to find paired net device\n"); 952 ret = -ENODEV; 953 goto err_free_cq_ring; 954 } 955 956 if (pdev_net->vendor != PCI_VENDOR_ID_VMWARE || 957 pdev_net->device != PCI_DEVICE_ID_VMWARE_VMXNET3) { 958 dev_err(&pdev->dev, "failed to find paired vmxnet3 device\n"); 959 pci_dev_put(pdev_net); 960 ret = -ENODEV; 961 goto err_free_cq_ring; 962 } 963 964 dev->netdev = pci_get_drvdata(pdev_net); 965 pci_dev_put(pdev_net); 966 if (!dev->netdev) { 967 dev_err(&pdev->dev, "failed to get vmxnet3 device\n"); 968 ret = -ENODEV; 969 goto err_free_cq_ring; 970 } 971 972 dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name); 973 974 /* Interrupt setup */ 975 ret = pvrdma_alloc_intrs(dev); 976 if (ret) { 977 dev_err(&pdev->dev, "failed to allocate interrupts\n"); 978 ret = -ENOMEM; 979 goto err_free_cq_ring; 980 } 981 982 /* Allocate UAR table. */ 983 ret = pvrdma_uar_table_init(dev); 984 if (ret) { 985 dev_err(&pdev->dev, "failed to allocate UAR table\n"); 986 ret = -ENOMEM; 987 goto err_free_intrs; 988 } 989 990 /* Allocate GID table */ 991 dev->sgid_tbl = kcalloc(dev->dsr->caps.gid_tbl_len, 992 sizeof(union ib_gid), GFP_KERNEL); 993 if (!dev->sgid_tbl) { 994 ret = -ENOMEM; 995 goto err_free_uar_table; 996 } 997 dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len); 998 999 pvrdma_enable_intrs(dev); 1000 1001 /* Activate pvrdma device */ 1002 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE); 1003 1004 /* Make sure the write is complete before reading status. */ 1005 mb(); 1006 1007 /* Check if device was successfully activated */ 1008 ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR); 1009 if (ret != 0) { 1010 dev_err(&pdev->dev, "failed to activate device\n"); 1011 ret = -EFAULT; 1012 goto err_disable_intr; 1013 } 1014 1015 /* Register IB device */ 1016 ret = pvrdma_register_device(dev); 1017 if (ret) { 1018 dev_err(&pdev->dev, "failed to register IB device\n"); 1019 goto err_disable_intr; 1020 } 1021 1022 dev->nb_netdev.notifier_call = pvrdma_netdevice_event; 1023 ret = register_netdevice_notifier(&dev->nb_netdev); 1024 if (ret) { 1025 dev_err(&pdev->dev, "failed to register netdevice events\n"); 1026 goto err_unreg_ibdev; 1027 } 1028 1029 dev_info(&pdev->dev, "attached to device\n"); 1030 return 0; 1031 1032 err_unreg_ibdev: 1033 ib_unregister_device(&dev->ib_dev); 1034 err_disable_intr: 1035 pvrdma_disable_intrs(dev); 1036 kfree(dev->sgid_tbl); 1037 err_free_uar_table: 1038 pvrdma_uar_table_cleanup(dev); 1039 err_free_intrs: 1040 pvrdma_free_irq(dev); 1041 pci_free_irq_vectors(pdev); 1042 err_free_cq_ring: 1043 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); 1044 err_free_async_ring: 1045 pvrdma_page_dir_cleanup(dev, &dev->async_pdir); 1046 err_free_slots: 1047 pvrdma_free_slots(dev); 1048 err_free_dsr: 1049 dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr, 1050 dev->dsrbase); 1051 err_uar_unmap: 1052 iounmap(dev->driver_uar.map); 1053 err_unmap_regs: 1054 iounmap(dev->regs); 1055 err_free_resource: 1056 pci_release_regions(pdev); 1057 err_disable_pdev: 1058 pci_disable_device(pdev); 1059 pci_set_drvdata(pdev, NULL); 1060 err_free_device: 1061 mutex_lock(&pvrdma_device_list_lock); 1062 list_del(&dev->device_link); 1063 mutex_unlock(&pvrdma_device_list_lock); 1064 ib_dealloc_device(&dev->ib_dev); 1065 return ret; 1066 } 1067 1068 static void pvrdma_pci_remove(struct pci_dev *pdev) 1069 { 1070 struct pvrdma_dev *dev = pci_get_drvdata(pdev); 1071 1072 if (!dev) 1073 return; 1074 1075 dev_info(&pdev->dev, "detaching from device\n"); 1076 1077 unregister_netdevice_notifier(&dev->nb_netdev); 1078 dev->nb_netdev.notifier_call = NULL; 1079 1080 flush_workqueue(event_wq); 1081 1082 /* Unregister ib device */ 1083 ib_unregister_device(&dev->ib_dev); 1084 1085 mutex_lock(&pvrdma_device_list_lock); 1086 list_del(&dev->device_link); 1087 mutex_unlock(&pvrdma_device_list_lock); 1088 1089 pvrdma_disable_intrs(dev); 1090 pvrdma_free_irq(dev); 1091 pci_free_irq_vectors(pdev); 1092 1093 /* Deactivate pvrdma device */ 1094 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET); 1095 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); 1096 pvrdma_page_dir_cleanup(dev, &dev->async_pdir); 1097 pvrdma_free_slots(dev); 1098 1099 iounmap(dev->regs); 1100 kfree(dev->sgid_tbl); 1101 kfree(dev->cq_tbl); 1102 kfree(dev->srq_tbl); 1103 kfree(dev->qp_tbl); 1104 pvrdma_uar_table_cleanup(dev); 1105 iounmap(dev->driver_uar.map); 1106 1107 ib_dealloc_device(&dev->ib_dev); 1108 1109 /* Free pci resources */ 1110 pci_release_regions(pdev); 1111 pci_disable_device(pdev); 1112 pci_set_drvdata(pdev, NULL); 1113 } 1114 1115 static const struct pci_device_id pvrdma_pci_table[] = { 1116 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_PVRDMA), }, 1117 { 0 }, 1118 }; 1119 1120 MODULE_DEVICE_TABLE(pci, pvrdma_pci_table); 1121 1122 static struct pci_driver pvrdma_driver = { 1123 .name = DRV_NAME, 1124 .id_table = pvrdma_pci_table, 1125 .probe = pvrdma_pci_probe, 1126 .remove = pvrdma_pci_remove, 1127 }; 1128 1129 static int __init pvrdma_init(void) 1130 { 1131 int err; 1132 1133 event_wq = alloc_ordered_workqueue("pvrdma_event_wq", WQ_MEM_RECLAIM); 1134 if (!event_wq) 1135 return -ENOMEM; 1136 1137 err = pci_register_driver(&pvrdma_driver); 1138 if (err) 1139 destroy_workqueue(event_wq); 1140 1141 return err; 1142 } 1143 1144 static void __exit pvrdma_cleanup(void) 1145 { 1146 pci_unregister_driver(&pvrdma_driver); 1147 1148 destroy_workqueue(event_wq); 1149 } 1150 1151 module_init(pvrdma_init); 1152 module_exit(pvrdma_cleanup); 1153 1154 MODULE_AUTHOR("VMware, Inc"); 1155 MODULE_DESCRIPTION("VMware Paravirtual RDMA driver"); 1156 MODULE_LICENSE("Dual BSD/GPL"); 1157