1 /* 2 * QEMU paravirtual RDMA 3 * 4 * Copyright (C) 2018 Oracle 5 * Copyright (C) 2018 Red Hat Inc 6 * 7 * Authors: 8 * Yuval Shaia <yuval.shaia@oracle.com> 9 * Marcel Apfelbaum <marcel@redhat.com> 10 * 11 * This work is licensed under the terms of the GNU GPL, version 2 or later. 12 * See the COPYING file in the top-level directory. 13 * 14 */ 15 16 #include "qemu/osdep.h" 17 #include "qapi/error.h" 18 #include "qemu/module.h" 19 #include "hw/pci/pci.h" 20 #include "hw/pci/pci_ids.h" 21 #include "hw/pci/msi.h" 22 #include "hw/pci/msix.h" 23 #include "hw/qdev-properties.h" 24 #include "hw/qdev-properties-system.h" 25 #include "cpu.h" 26 #include "trace.h" 27 #include "monitor/monitor.h" 28 #include "hw/rdma/rdma.h" 29 30 #include "../rdma_rm.h" 31 #include "../rdma_backend.h" 32 #include "../rdma_utils.h" 33 34 #include <infiniband/verbs.h> 35 #include "pvrdma.h" 36 #include "standard-headers/rdma/vmw_pvrdma-abi.h" 37 #include "sysemu/runstate.h" 38 #include "standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h" 39 #include "pvrdma_qp_ops.h" 40 41 static Property pvrdma_dev_properties[] = { 42 DEFINE_PROP_STRING("netdev", PVRDMADev, backend_eth_device_name), 43 DEFINE_PROP_STRING("ibdev", PVRDMADev, backend_device_name), 44 DEFINE_PROP_UINT8("ibport", PVRDMADev, backend_port_num, 1), 45 DEFINE_PROP_UINT64("dev-caps-max-mr-size", PVRDMADev, dev_attr.max_mr_size, 46 MAX_MR_SIZE), 47 DEFINE_PROP_INT32("dev-caps-max-qp", PVRDMADev, dev_attr.max_qp, MAX_QP), 48 DEFINE_PROP_INT32("dev-caps-max-cq", PVRDMADev, dev_attr.max_cq, MAX_CQ), 49 DEFINE_PROP_INT32("dev-caps-max-mr", PVRDMADev, dev_attr.max_mr, MAX_MR), 50 DEFINE_PROP_INT32("dev-caps-max-pd", PVRDMADev, dev_attr.max_pd, MAX_PD), 51 DEFINE_PROP_INT32("dev-caps-qp-rd-atom", PVRDMADev, dev_attr.max_qp_rd_atom, 52 MAX_QP_RD_ATOM), 53 DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev, 54 dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM), 55 DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH), 56 DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ), 57 DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr), 58 DEFINE_PROP_END_OF_LIST(), 59 }; 60 61 static void pvrdma_format_statistics(RdmaProvider *obj, GString *buf) 62 { 63 PVRDMADev *dev = PVRDMA_DEV(obj); 64 PCIDevice *pdev = PCI_DEVICE(dev); 65 66 g_string_append_printf(buf, "%s, %x.%x\n", 67 pdev->name, PCI_SLOT(pdev->devfn), 68 PCI_FUNC(pdev->devfn)); 69 g_string_append_printf(buf, "\tcommands : %" PRId64 "\n", 70 dev->stats.commands); 71 g_string_append_printf(buf, "\tregs_reads : %" PRId64 "\n", 72 dev->stats.regs_reads); 73 g_string_append_printf(buf, "\tregs_writes : %" PRId64 "\n", 74 dev->stats.regs_writes); 75 g_string_append_printf(buf, "\tuar_writes : %" PRId64 "\n", 76 dev->stats.uar_writes); 77 g_string_append_printf(buf, "\tinterrupts : %" PRId64 "\n", 78 dev->stats.interrupts); 79 rdma_format_device_counters(&dev->rdma_dev_res, buf); 80 } 81 82 static void free_dev_ring(PCIDevice *pci_dev, PvrdmaRing *ring, 83 void *ring_state) 84 { 85 pvrdma_ring_free(ring); 86 rdma_pci_dma_unmap(pci_dev, ring_state, TARGET_PAGE_SIZE); 87 } 88 89 static int init_dev_ring(PvrdmaRing *ring, PvrdmaRingState **ring_state, 90 const char *name, PCIDevice *pci_dev, 91 dma_addr_t dir_addr, uint32_t num_pages) 92 { 93 uint64_t *dir, *tbl; 94 int rc = 0; 95 96 if (!num_pages) { 97 rdma_error_report("Ring pages count must be strictly positive"); 98 return -EINVAL; 99 } 100 101 dir = rdma_pci_dma_map(pci_dev, dir_addr, TARGET_PAGE_SIZE); 102 if (!dir) { 103 rdma_error_report("Failed to map to page directory (ring %s)", name); 104 rc = -ENOMEM; 105 goto out; 106 } 107 tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE); 108 if (!tbl) { 109 rdma_error_report("Failed to map to page table (ring %s)", name); 110 rc = -ENOMEM; 111 goto out_free_dir; 112 } 113 114 *ring_state = rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE); 115 if (!*ring_state) { 116 rdma_error_report("Failed to map to ring state (ring %s)", name); 117 rc = -ENOMEM; 118 goto out_free_tbl; 119 } 120 /* RX ring is the second */ 121 (*ring_state)++; 122 rc = pvrdma_ring_init(ring, name, pci_dev, 123 (PvrdmaRingState *)*ring_state, 124 (num_pages - 1) * TARGET_PAGE_SIZE / 125 sizeof(struct pvrdma_cqne), 126 sizeof(struct pvrdma_cqne), 127 (dma_addr_t *)&tbl[1], (dma_addr_t)num_pages - 1); 128 if (rc) { 129 rc = -ENOMEM; 130 goto out_free_ring_state; 131 } 132 133 goto out_free_tbl; 134 135 out_free_ring_state: 136 rdma_pci_dma_unmap(pci_dev, *ring_state, TARGET_PAGE_SIZE); 137 138 out_free_tbl: 139 rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE); 140 141 out_free_dir: 142 rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE); 143 144 out: 145 return rc; 146 } 147 148 static void free_dsr(PVRDMADev *dev) 149 { 150 PCIDevice *pci_dev = PCI_DEVICE(dev); 151 152 if (!dev->dsr_info.dsr) { 153 return; 154 } 155 156 free_dev_ring(pci_dev, &dev->dsr_info.async, 157 dev->dsr_info.async_ring_state); 158 159 free_dev_ring(pci_dev, &dev->dsr_info.cq, dev->dsr_info.cq_ring_state); 160 161 rdma_pci_dma_unmap(pci_dev, dev->dsr_info.req, 162 sizeof(union pvrdma_cmd_req)); 163 164 rdma_pci_dma_unmap(pci_dev, dev->dsr_info.rsp, 165 sizeof(union pvrdma_cmd_resp)); 166 167 rdma_pci_dma_unmap(pci_dev, dev->dsr_info.dsr, 168 sizeof(struct pvrdma_device_shared_region)); 169 170 dev->dsr_info.dsr = NULL; 171 } 172 173 static int load_dsr(PVRDMADev *dev) 174 { 175 int rc = 0; 176 PCIDevice *pci_dev = PCI_DEVICE(dev); 177 DSRInfo *dsr_info; 178 struct pvrdma_device_shared_region *dsr; 179 180 free_dsr(dev); 181 182 /* Map to DSR */ 183 dev->dsr_info.dsr = rdma_pci_dma_map(pci_dev, dev->dsr_info.dma, 184 sizeof(struct pvrdma_device_shared_region)); 185 if (!dev->dsr_info.dsr) { 186 rdma_error_report("Failed to map to DSR"); 187 rc = -ENOMEM; 188 goto out; 189 } 190 191 /* Shortcuts */ 192 dsr_info = &dev->dsr_info; 193 dsr = dsr_info->dsr; 194 195 /* Map to command slot */ 196 dsr_info->req = rdma_pci_dma_map(pci_dev, dsr->cmd_slot_dma, 197 sizeof(union pvrdma_cmd_req)); 198 if (!dsr_info->req) { 199 rdma_error_report("Failed to map to command slot address"); 200 rc = -ENOMEM; 201 goto out_free_dsr; 202 } 203 204 /* Map to response slot */ 205 dsr_info->rsp = rdma_pci_dma_map(pci_dev, dsr->resp_slot_dma, 206 sizeof(union pvrdma_cmd_resp)); 207 if (!dsr_info->rsp) { 208 rdma_error_report("Failed to map to response slot address"); 209 rc = -ENOMEM; 210 goto out_free_req; 211 } 212 213 /* Map to CQ notification ring */ 214 rc = init_dev_ring(&dsr_info->cq, &dsr_info->cq_ring_state, "dev_cq", 215 pci_dev, dsr->cq_ring_pages.pdir_dma, 216 dsr->cq_ring_pages.num_pages); 217 if (rc) { 218 rc = -ENOMEM; 219 goto out_free_rsp; 220 } 221 222 /* Map to event notification ring */ 223 rc = init_dev_ring(&dsr_info->async, &dsr_info->async_ring_state, 224 "dev_async", pci_dev, dsr->async_ring_pages.pdir_dma, 225 dsr->async_ring_pages.num_pages); 226 if (rc) { 227 rc = -ENOMEM; 228 goto out_free_rsp; 229 } 230 231 goto out; 232 233 out_free_rsp: 234 rdma_pci_dma_unmap(pci_dev, dsr_info->rsp, sizeof(union pvrdma_cmd_resp)); 235 236 out_free_req: 237 rdma_pci_dma_unmap(pci_dev, dsr_info->req, sizeof(union pvrdma_cmd_req)); 238 239 out_free_dsr: 240 rdma_pci_dma_unmap(pci_dev, dsr_info->dsr, 241 sizeof(struct pvrdma_device_shared_region)); 242 dsr_info->dsr = NULL; 243 244 out: 245 return rc; 246 } 247 248 static void init_dsr_dev_caps(PVRDMADev *dev) 249 { 250 struct pvrdma_device_shared_region *dsr; 251 252 if (!dev->dsr_info.dsr) { 253 /* Buggy or malicious guest driver */ 254 rdma_error_report("Can't initialized DSR"); 255 return; 256 } 257 258 dsr = dev->dsr_info.dsr; 259 dsr->caps.fw_ver = PVRDMA_FW_VERSION; 260 dsr->caps.mode = PVRDMA_DEVICE_MODE_ROCE; 261 dsr->caps.gid_types |= PVRDMA_GID_TYPE_FLAG_ROCE_V1; 262 dsr->caps.max_uar = RDMA_BAR2_UAR_SIZE; 263 dsr->caps.max_mr_size = dev->dev_attr.max_mr_size; 264 dsr->caps.max_qp = dev->dev_attr.max_qp; 265 dsr->caps.max_qp_wr = dev->dev_attr.max_qp_wr; 266 dsr->caps.max_sge = dev->dev_attr.max_sge; 267 dsr->caps.max_cq = dev->dev_attr.max_cq; 268 dsr->caps.max_cqe = dev->dev_attr.max_cqe; 269 dsr->caps.max_mr = dev->dev_attr.max_mr; 270 dsr->caps.max_pd = dev->dev_attr.max_pd; 271 dsr->caps.max_ah = dev->dev_attr.max_ah; 272 dsr->caps.max_srq = dev->dev_attr.max_srq; 273 dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr; 274 dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge; 275 dsr->caps.gid_tbl_len = MAX_GIDS; 276 dsr->caps.sys_image_guid = 0; 277 dsr->caps.node_guid = dev->node_guid; 278 dsr->caps.phys_port_cnt = MAX_PORTS; 279 dsr->caps.max_pkeys = MAX_PKEYS; 280 } 281 282 static void uninit_msix(PCIDevice *pdev, int used_vectors) 283 { 284 PVRDMADev *dev = PVRDMA_DEV(pdev); 285 int i; 286 287 for (i = 0; i < used_vectors; i++) { 288 msix_vector_unuse(pdev, i); 289 } 290 291 msix_uninit(pdev, &dev->msix, &dev->msix); 292 } 293 294 static int init_msix(PCIDevice *pdev) 295 { 296 PVRDMADev *dev = PVRDMA_DEV(pdev); 297 int i; 298 int rc; 299 300 rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX, 301 RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX, 302 RDMA_MSIX_PBA, 0, NULL); 303 304 if (rc < 0) { 305 rdma_error_report("Failed to initialize MSI-X"); 306 return rc; 307 } 308 309 for (i = 0; i < RDMA_MAX_INTRS; i++) { 310 msix_vector_use(PCI_DEVICE(dev), i); 311 } 312 313 return 0; 314 } 315 316 static void pvrdma_fini(PCIDevice *pdev) 317 { 318 PVRDMADev *dev = PVRDMA_DEV(pdev); 319 320 notifier_remove(&dev->shutdown_notifier); 321 322 pvrdma_qp_ops_fini(); 323 324 rdma_backend_stop(&dev->backend_dev); 325 326 rdma_rm_fini(&dev->rdma_dev_res, &dev->backend_dev, 327 dev->backend_eth_device_name); 328 329 rdma_backend_fini(&dev->backend_dev); 330 331 free_dsr(dev); 332 333 if (msix_enabled(pdev)) { 334 uninit_msix(pdev, RDMA_MAX_INTRS); 335 } 336 337 rdma_info_report("Device %s %x.%x is down", pdev->name, 338 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 339 } 340 341 static void pvrdma_stop(PVRDMADev *dev) 342 { 343 rdma_backend_stop(&dev->backend_dev); 344 } 345 346 static void pvrdma_start(PVRDMADev *dev) 347 { 348 rdma_backend_start(&dev->backend_dev); 349 } 350 351 static void activate_device(PVRDMADev *dev) 352 { 353 pvrdma_start(dev); 354 set_reg_val(dev, PVRDMA_REG_ERR, 0); 355 } 356 357 static int unquiesce_device(PVRDMADev *dev) 358 { 359 return 0; 360 } 361 362 static void reset_device(PVRDMADev *dev) 363 { 364 pvrdma_stop(dev); 365 } 366 367 static uint64_t pvrdma_regs_read(void *opaque, hwaddr addr, unsigned size) 368 { 369 PVRDMADev *dev = opaque; 370 uint32_t val; 371 372 dev->stats.regs_reads++; 373 374 if (get_reg_val(dev, addr, &val)) { 375 rdma_error_report("Failed to read REG value from address 0x%x", 376 (uint32_t)addr); 377 return -EINVAL; 378 } 379 380 trace_pvrdma_regs_read(addr, val); 381 382 return val; 383 } 384 385 static void pvrdma_regs_write(void *opaque, hwaddr addr, uint64_t val, 386 unsigned size) 387 { 388 PVRDMADev *dev = opaque; 389 390 dev->stats.regs_writes++; 391 392 if (set_reg_val(dev, addr, val)) { 393 rdma_error_report("Failed to set REG value, addr=0x%"PRIx64 ", val=0x%"PRIx64, 394 addr, val); 395 return; 396 } 397 398 switch (addr) { 399 case PVRDMA_REG_DSRLOW: 400 trace_pvrdma_regs_write(addr, val, "DSRLOW", ""); 401 dev->dsr_info.dma = val; 402 break; 403 case PVRDMA_REG_DSRHIGH: 404 trace_pvrdma_regs_write(addr, val, "DSRHIGH", ""); 405 dev->dsr_info.dma |= val << 32; 406 load_dsr(dev); 407 init_dsr_dev_caps(dev); 408 break; 409 case PVRDMA_REG_CTL: 410 switch (val) { 411 case PVRDMA_DEVICE_CTL_ACTIVATE: 412 trace_pvrdma_regs_write(addr, val, "CTL", "ACTIVATE"); 413 activate_device(dev); 414 break; 415 case PVRDMA_DEVICE_CTL_UNQUIESCE: 416 trace_pvrdma_regs_write(addr, val, "CTL", "UNQUIESCE"); 417 unquiesce_device(dev); 418 break; 419 case PVRDMA_DEVICE_CTL_RESET: 420 trace_pvrdma_regs_write(addr, val, "CTL", "URESET"); 421 reset_device(dev); 422 break; 423 } 424 break; 425 case PVRDMA_REG_IMR: 426 trace_pvrdma_regs_write(addr, val, "INTR_MASK", ""); 427 dev->interrupt_mask = val; 428 break; 429 case PVRDMA_REG_REQUEST: 430 if (val == 0) { 431 trace_pvrdma_regs_write(addr, val, "REQUEST", ""); 432 pvrdma_exec_cmd(dev); 433 } 434 break; 435 default: 436 break; 437 } 438 } 439 440 static const MemoryRegionOps regs_ops = { 441 .read = pvrdma_regs_read, 442 .write = pvrdma_regs_write, 443 .endianness = DEVICE_LITTLE_ENDIAN, 444 .impl = { 445 .min_access_size = sizeof(uint32_t), 446 .max_access_size = sizeof(uint32_t), 447 }, 448 }; 449 450 static uint64_t pvrdma_uar_read(void *opaque, hwaddr addr, unsigned size) 451 { 452 return 0xffffffff; 453 } 454 455 static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val, 456 unsigned size) 457 { 458 PVRDMADev *dev = opaque; 459 460 dev->stats.uar_writes++; 461 462 switch (addr & 0xFFF) { /* Mask with 0xFFF as each UC gets page */ 463 case PVRDMA_UAR_QP_OFFSET: 464 if (val & PVRDMA_UAR_QP_SEND) { 465 trace_pvrdma_uar_write(addr, val, "QP", "SEND", 466 val & PVRDMA_UAR_HANDLE_MASK, 0); 467 pvrdma_qp_send(dev, val & PVRDMA_UAR_HANDLE_MASK); 468 } 469 if (val & PVRDMA_UAR_QP_RECV) { 470 trace_pvrdma_uar_write(addr, val, "QP", "RECV", 471 val & PVRDMA_UAR_HANDLE_MASK, 0); 472 pvrdma_qp_recv(dev, val & PVRDMA_UAR_HANDLE_MASK); 473 } 474 break; 475 case PVRDMA_UAR_CQ_OFFSET: 476 if (val & PVRDMA_UAR_CQ_ARM) { 477 trace_pvrdma_uar_write(addr, val, "CQ", "ARM", 478 val & PVRDMA_UAR_HANDLE_MASK, 479 !!(val & PVRDMA_UAR_CQ_ARM_SOL)); 480 rdma_rm_req_notify_cq(&dev->rdma_dev_res, 481 val & PVRDMA_UAR_HANDLE_MASK, 482 !!(val & PVRDMA_UAR_CQ_ARM_SOL)); 483 } 484 if (val & PVRDMA_UAR_CQ_ARM_SOL) { 485 trace_pvrdma_uar_write(addr, val, "CQ", "ARMSOL - not supported", 0, 486 0); 487 } 488 if (val & PVRDMA_UAR_CQ_POLL) { 489 trace_pvrdma_uar_write(addr, val, "CQ", "POLL", 490 val & PVRDMA_UAR_HANDLE_MASK, 0); 491 pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK); 492 } 493 break; 494 case PVRDMA_UAR_SRQ_OFFSET: 495 if (val & PVRDMA_UAR_SRQ_RECV) { 496 trace_pvrdma_uar_write(addr, val, "QP", "SRQ", 497 val & PVRDMA_UAR_HANDLE_MASK, 0); 498 pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK); 499 } 500 break; 501 default: 502 rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64, 503 addr, val); 504 break; 505 } 506 } 507 508 static const MemoryRegionOps uar_ops = { 509 .read = pvrdma_uar_read, 510 .write = pvrdma_uar_write, 511 .endianness = DEVICE_LITTLE_ENDIAN, 512 .impl = { 513 .min_access_size = sizeof(uint32_t), 514 .max_access_size = sizeof(uint32_t), 515 }, 516 }; 517 518 static void init_pci_config(PCIDevice *pdev) 519 { 520 pdev->config[PCI_INTERRUPT_PIN] = 1; 521 } 522 523 static void init_bars(PCIDevice *pdev) 524 { 525 PVRDMADev *dev = PVRDMA_DEV(pdev); 526 527 /* BAR 0 - MSI-X */ 528 memory_region_init(&dev->msix, OBJECT(dev), "pvrdma-msix", 529 RDMA_BAR0_MSIX_SIZE); 530 pci_register_bar(pdev, RDMA_MSIX_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY, 531 &dev->msix); 532 533 /* BAR 1 - Registers */ 534 memset(&dev->regs_data, 0, sizeof(dev->regs_data)); 535 memory_region_init_io(&dev->regs, OBJECT(dev), ®s_ops, dev, 536 "pvrdma-regs", sizeof(dev->regs_data)); 537 pci_register_bar(pdev, RDMA_REG_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY, 538 &dev->regs); 539 540 /* BAR 2 - UAR */ 541 memset(&dev->uar_data, 0, sizeof(dev->uar_data)); 542 memory_region_init_io(&dev->uar, OBJECT(dev), &uar_ops, dev, "rdma-uar", 543 sizeof(dev->uar_data)); 544 pci_register_bar(pdev, RDMA_UAR_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY, 545 &dev->uar); 546 } 547 548 static void init_regs(PCIDevice *pdev) 549 { 550 PVRDMADev *dev = PVRDMA_DEV(pdev); 551 552 set_reg_val(dev, PVRDMA_REG_VERSION, PVRDMA_HW_VERSION); 553 set_reg_val(dev, PVRDMA_REG_ERR, 0xFFFF); 554 } 555 556 static void init_dev_caps(PVRDMADev *dev) 557 { 558 size_t pg_tbl_bytes = TARGET_PAGE_SIZE * 559 (TARGET_PAGE_SIZE / sizeof(uint64_t)); 560 size_t wr_sz = MAX(sizeof(struct pvrdma_sq_wqe_hdr), 561 sizeof(struct pvrdma_rq_wqe_hdr)); 562 563 dev->dev_attr.max_qp_wr = pg_tbl_bytes / 564 (wr_sz + sizeof(struct pvrdma_sge) * 565 dev->dev_attr.max_sge) - TARGET_PAGE_SIZE; 566 /* First page is ring state ^^^^ */ 567 568 dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) - 569 TARGET_PAGE_SIZE; /* First page is ring state */ 570 571 dev->dev_attr.max_srq_wr = pg_tbl_bytes / 572 ((sizeof(struct pvrdma_rq_wqe_hdr) + 573 sizeof(struct pvrdma_sge)) * 574 dev->dev_attr.max_sge) - TARGET_PAGE_SIZE; 575 } 576 577 static int pvrdma_check_ram_shared(Object *obj, void *opaque) 578 { 579 bool *shared = opaque; 580 581 if (object_dynamic_cast(obj, "memory-backend-ram")) { 582 *shared = object_property_get_bool(obj, "share", NULL); 583 } 584 585 return 0; 586 } 587 588 static void pvrdma_shutdown_notifier(Notifier *n, void *opaque) 589 { 590 PVRDMADev *dev = container_of(n, PVRDMADev, shutdown_notifier); 591 PCIDevice *pci_dev = PCI_DEVICE(dev); 592 593 pvrdma_fini(pci_dev); 594 } 595 596 static void pvrdma_realize(PCIDevice *pdev, Error **errp) 597 { 598 int rc = 0; 599 PVRDMADev *dev = PVRDMA_DEV(pdev); 600 Object *memdev_root; 601 bool ram_shared = false; 602 PCIDevice *func0; 603 604 rdma_info_report("Initializing device %s %x.%x", pdev->name, 605 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 606 607 if (TARGET_PAGE_SIZE != qemu_real_host_page_size()) { 608 error_setg(errp, "Target page size must be the same as host page size"); 609 return; 610 } 611 612 func0 = pci_get_function_0(pdev); 613 /* Break if not vmxnet3 device in slot 0 */ 614 if (strcmp(object_get_typename(OBJECT(func0)), TYPE_VMXNET3)) { 615 error_setg(errp, "Device on %x.0 must be %s", PCI_SLOT(pdev->devfn), 616 TYPE_VMXNET3); 617 return; 618 } 619 dev->func0 = VMXNET3(func0); 620 621 addrconf_addr_eui48((unsigned char *)&dev->node_guid, 622 (const char *)&dev->func0->conf.macaddr.a); 623 624 memdev_root = object_resolve_path("/objects", NULL); 625 if (memdev_root) { 626 object_child_foreach(memdev_root, pvrdma_check_ram_shared, &ram_shared); 627 } 628 if (!ram_shared) { 629 error_setg(errp, "Only shared memory backed ram is supported"); 630 return; 631 } 632 633 dev->dsr_info.dsr = NULL; 634 635 init_pci_config(pdev); 636 637 init_bars(pdev); 638 639 init_regs(pdev); 640 641 rc = init_msix(pdev); 642 if (rc) { 643 goto out; 644 } 645 646 rc = rdma_backend_init(&dev->backend_dev, pdev, &dev->rdma_dev_res, 647 dev->backend_device_name, dev->backend_port_num, 648 &dev->dev_attr, &dev->mad_chr); 649 if (rc) { 650 goto out; 651 } 652 653 init_dev_caps(dev); 654 655 rc = rdma_rm_init(&dev->rdma_dev_res, &dev->dev_attr); 656 if (rc) { 657 goto out; 658 } 659 660 rc = pvrdma_qp_ops_init(); 661 if (rc) { 662 goto out; 663 } 664 665 memset(&dev->stats, 0, sizeof(dev->stats)); 666 667 dev->shutdown_notifier.notify = pvrdma_shutdown_notifier; 668 qemu_register_shutdown_notifier(&dev->shutdown_notifier); 669 670 #ifdef LEGACY_RDMA_REG_MR 671 rdma_info_report("Using legacy reg_mr"); 672 #else 673 rdma_info_report("Using iova reg_mr"); 674 #endif 675 676 out: 677 if (rc) { 678 pvrdma_fini(pdev); 679 error_append_hint(errp, "Device failed to load\n"); 680 } 681 } 682 683 static void pvrdma_class_init(ObjectClass *klass, void *data) 684 { 685 DeviceClass *dc = DEVICE_CLASS(klass); 686 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 687 RdmaProviderClass *ir = RDMA_PROVIDER_CLASS(klass); 688 689 k->realize = pvrdma_realize; 690 k->vendor_id = PCI_VENDOR_ID_VMWARE; 691 k->device_id = PCI_DEVICE_ID_VMWARE_PVRDMA; 692 k->revision = 0x00; 693 k->class_id = PCI_CLASS_NETWORK_OTHER; 694 695 dc->desc = "RDMA Device"; 696 device_class_set_props(dc, pvrdma_dev_properties); 697 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 698 699 ir->format_statistics = pvrdma_format_statistics; 700 } 701 702 static const TypeInfo pvrdma_info = { 703 .name = PVRDMA_HW_NAME, 704 .parent = TYPE_PCI_DEVICE, 705 .instance_size = sizeof(PVRDMADev), 706 .class_init = pvrdma_class_init, 707 .interfaces = (InterfaceInfo[]) { 708 { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 709 { INTERFACE_RDMA_PROVIDER }, 710 { } 711 } 712 }; 713 714 static void register_types(void) 715 { 716 type_register_static(&pvrdma_info); 717 } 718 719 type_init(register_types) 720