xref: /openbmc/qemu/hw/rdma/vmw/pvrdma_main.c (revision 8e6fe6b8)
1 /*
2  * QEMU paravirtual RDMA
3  *
4  * Copyright (C) 2018 Oracle
5  * Copyright (C) 2018 Red Hat Inc
6  *
7  * Authors:
8  *     Yuval Shaia <yuval.shaia@oracle.com>
9  *     Marcel Apfelbaum <marcel@redhat.com>
10  *
11  * This work is licensed under the terms of the GNU GPL, version 2 or later.
12  * See the COPYING file in the top-level directory.
13  *
14  */
15 
16 #include "qemu/osdep.h"
17 #include "qapi/error.h"
18 #include "qemu/module.h"
19 #include "hw/hw.h"
20 #include "hw/pci/pci.h"
21 #include "hw/pci/pci_ids.h"
22 #include "hw/pci/msi.h"
23 #include "hw/pci/msix.h"
24 #include "hw/qdev-core.h"
25 #include "hw/qdev-properties.h"
26 #include "cpu.h"
27 #include "trace.h"
28 #include "sysemu/sysemu.h"
29 #include "monitor/monitor.h"
30 #include "hw/rdma/rdma.h"
31 
32 #include "../rdma_rm.h"
33 #include "../rdma_backend.h"
34 #include "../rdma_utils.h"
35 
36 #include <infiniband/verbs.h>
37 #include "pvrdma.h"
38 #include "standard-headers/rdma/vmw_pvrdma-abi.h"
39 #include "standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h"
40 #include "pvrdma_qp_ops.h"
41 
42 static Property pvrdma_dev_properties[] = {
43     DEFINE_PROP_STRING("netdev", PVRDMADev, backend_eth_device_name),
44     DEFINE_PROP_STRING("ibdev", PVRDMADev, backend_device_name),
45     DEFINE_PROP_UINT8("ibport", PVRDMADev, backend_port_num, 1),
46     DEFINE_PROP_UINT64("dev-caps-max-mr-size", PVRDMADev, dev_attr.max_mr_size,
47                        MAX_MR_SIZE),
48     DEFINE_PROP_INT32("dev-caps-max-qp", PVRDMADev, dev_attr.max_qp, MAX_QP),
49     DEFINE_PROP_INT32("dev-caps-max-cq", PVRDMADev, dev_attr.max_cq, MAX_CQ),
50     DEFINE_PROP_INT32("dev-caps-max-mr", PVRDMADev, dev_attr.max_mr, MAX_MR),
51     DEFINE_PROP_INT32("dev-caps-max-pd", PVRDMADev, dev_attr.max_pd, MAX_PD),
52     DEFINE_PROP_INT32("dev-caps-qp-rd-atom", PVRDMADev, dev_attr.max_qp_rd_atom,
53                       MAX_QP_RD_ATOM),
54     DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev,
55                       dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM),
56     DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH),
57     DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ),
58     DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr),
59     DEFINE_PROP_END_OF_LIST(),
60 };
61 
62 static void pvrdma_print_statistics(Monitor *mon, RdmaProvider *obj)
63 {
64     PVRDMADev *dev = PVRDMA_DEV(obj);
65     PCIDevice *pdev = PCI_DEVICE(dev);
66 
67     monitor_printf(mon, "%s, %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn),
68                    PCI_FUNC(pdev->devfn));
69     monitor_printf(mon, "\tcommands         : %" PRId64 "\n",
70                    dev->stats.commands);
71     monitor_printf(mon, "\tregs_reads       : %" PRId64 "\n",
72                    dev->stats.regs_reads);
73     monitor_printf(mon, "\tregs_writes      : %" PRId64 "\n",
74                    dev->stats.regs_writes);
75     monitor_printf(mon, "\tuar_writes       : %" PRId64 "\n",
76                    dev->stats.uar_writes);
77     monitor_printf(mon, "\tinterrupts       : %" PRId64 "\n",
78                    dev->stats.interrupts);
79     rdma_dump_device_counters(mon, &dev->rdma_dev_res);
80 }
81 
82 static void free_dev_ring(PCIDevice *pci_dev, PvrdmaRing *ring,
83                           void *ring_state)
84 {
85     pvrdma_ring_free(ring);
86     rdma_pci_dma_unmap(pci_dev, ring_state, TARGET_PAGE_SIZE);
87 }
88 
89 static int init_dev_ring(PvrdmaRing *ring, struct pvrdma_ring **ring_state,
90                          const char *name, PCIDevice *pci_dev,
91                          dma_addr_t dir_addr, uint32_t num_pages)
92 {
93     uint64_t *dir, *tbl;
94     int rc = 0;
95 
96     dir = rdma_pci_dma_map(pci_dev, dir_addr, TARGET_PAGE_SIZE);
97     if (!dir) {
98         rdma_error_report("Failed to map to page directory (ring %s)", name);
99         rc = -ENOMEM;
100         goto out;
101     }
102     tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE);
103     if (!tbl) {
104         rdma_error_report("Failed to map to page table (ring %s)", name);
105         rc = -ENOMEM;
106         goto out_free_dir;
107     }
108 
109     *ring_state = rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE);
110     if (!*ring_state) {
111         rdma_error_report("Failed to map to ring state (ring %s)", name);
112         rc = -ENOMEM;
113         goto out_free_tbl;
114     }
115     /* RX ring is the second */
116     (*ring_state)++;
117     rc = pvrdma_ring_init(ring, name, pci_dev,
118                           (struct pvrdma_ring *)*ring_state,
119                           (num_pages - 1) * TARGET_PAGE_SIZE /
120                           sizeof(struct pvrdma_cqne),
121                           sizeof(struct pvrdma_cqne),
122                           (dma_addr_t *)&tbl[1], (dma_addr_t)num_pages - 1);
123     if (rc) {
124         rc = -ENOMEM;
125         goto out_free_ring_state;
126     }
127 
128     goto out_free_tbl;
129 
130 out_free_ring_state:
131     rdma_pci_dma_unmap(pci_dev, *ring_state, TARGET_PAGE_SIZE);
132 
133 out_free_tbl:
134     rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE);
135 
136 out_free_dir:
137     rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE);
138 
139 out:
140     return rc;
141 }
142 
143 static void free_dsr(PVRDMADev *dev)
144 {
145     PCIDevice *pci_dev = PCI_DEVICE(dev);
146 
147     if (!dev->dsr_info.dsr) {
148         return;
149     }
150 
151     free_dev_ring(pci_dev, &dev->dsr_info.async,
152                   dev->dsr_info.async_ring_state);
153 
154     free_dev_ring(pci_dev, &dev->dsr_info.cq, dev->dsr_info.cq_ring_state);
155 
156     rdma_pci_dma_unmap(pci_dev, dev->dsr_info.req,
157                          sizeof(union pvrdma_cmd_req));
158 
159     rdma_pci_dma_unmap(pci_dev, dev->dsr_info.rsp,
160                          sizeof(union pvrdma_cmd_resp));
161 
162     rdma_pci_dma_unmap(pci_dev, dev->dsr_info.dsr,
163                          sizeof(struct pvrdma_device_shared_region));
164 
165     dev->dsr_info.dsr = NULL;
166 }
167 
168 static int load_dsr(PVRDMADev *dev)
169 {
170     int rc = 0;
171     PCIDevice *pci_dev = PCI_DEVICE(dev);
172     DSRInfo *dsr_info;
173     struct pvrdma_device_shared_region *dsr;
174 
175     free_dsr(dev);
176 
177     /* Map to DSR */
178     dev->dsr_info.dsr = rdma_pci_dma_map(pci_dev, dev->dsr_info.dma,
179                               sizeof(struct pvrdma_device_shared_region));
180     if (!dev->dsr_info.dsr) {
181         rdma_error_report("Failed to map to DSR");
182         rc = -ENOMEM;
183         goto out;
184     }
185 
186     /* Shortcuts */
187     dsr_info = &dev->dsr_info;
188     dsr = dsr_info->dsr;
189 
190     /* Map to command slot */
191     dsr_info->req = rdma_pci_dma_map(pci_dev, dsr->cmd_slot_dma,
192                                      sizeof(union pvrdma_cmd_req));
193     if (!dsr_info->req) {
194         rdma_error_report("Failed to map to command slot address");
195         rc = -ENOMEM;
196         goto out_free_dsr;
197     }
198 
199     /* Map to response slot */
200     dsr_info->rsp = rdma_pci_dma_map(pci_dev, dsr->resp_slot_dma,
201                                      sizeof(union pvrdma_cmd_resp));
202     if (!dsr_info->rsp) {
203         rdma_error_report("Failed to map to response slot address");
204         rc = -ENOMEM;
205         goto out_free_req;
206     }
207 
208     /* Map to CQ notification ring */
209     rc = init_dev_ring(&dsr_info->cq, &dsr_info->cq_ring_state, "dev_cq",
210                        pci_dev, dsr->cq_ring_pages.pdir_dma,
211                        dsr->cq_ring_pages.num_pages);
212     if (rc) {
213         rc = -ENOMEM;
214         goto out_free_rsp;
215     }
216 
217     /* Map to event notification ring */
218     rc = init_dev_ring(&dsr_info->async, &dsr_info->async_ring_state,
219                        "dev_async", pci_dev, dsr->async_ring_pages.pdir_dma,
220                        dsr->async_ring_pages.num_pages);
221     if (rc) {
222         rc = -ENOMEM;
223         goto out_free_rsp;
224     }
225 
226     goto out;
227 
228 out_free_rsp:
229     rdma_pci_dma_unmap(pci_dev, dsr_info->rsp, sizeof(union pvrdma_cmd_resp));
230 
231 out_free_req:
232     rdma_pci_dma_unmap(pci_dev, dsr_info->req, sizeof(union pvrdma_cmd_req));
233 
234 out_free_dsr:
235     rdma_pci_dma_unmap(pci_dev, dsr_info->dsr,
236                        sizeof(struct pvrdma_device_shared_region));
237     dsr_info->dsr = NULL;
238 
239 out:
240     return rc;
241 }
242 
243 static void init_dsr_dev_caps(PVRDMADev *dev)
244 {
245     struct pvrdma_device_shared_region *dsr;
246 
247     if (dev->dsr_info.dsr == NULL) {
248         rdma_error_report("Can't initialized DSR");
249         return;
250     }
251 
252     dsr = dev->dsr_info.dsr;
253     dsr->caps.fw_ver = PVRDMA_FW_VERSION;
254     dsr->caps.mode = PVRDMA_DEVICE_MODE_ROCE;
255     dsr->caps.gid_types |= PVRDMA_GID_TYPE_FLAG_ROCE_V1;
256     dsr->caps.max_uar = RDMA_BAR2_UAR_SIZE;
257     dsr->caps.max_mr_size = dev->dev_attr.max_mr_size;
258     dsr->caps.max_qp = dev->dev_attr.max_qp;
259     dsr->caps.max_qp_wr = dev->dev_attr.max_qp_wr;
260     dsr->caps.max_sge = dev->dev_attr.max_sge;
261     dsr->caps.max_cq = dev->dev_attr.max_cq;
262     dsr->caps.max_cqe = dev->dev_attr.max_cqe;
263     dsr->caps.max_mr = dev->dev_attr.max_mr;
264     dsr->caps.max_pd = dev->dev_attr.max_pd;
265     dsr->caps.max_ah = dev->dev_attr.max_ah;
266     dsr->caps.max_srq = dev->dev_attr.max_srq;
267     dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr;
268     dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge;
269     dsr->caps.gid_tbl_len = MAX_GIDS;
270     dsr->caps.sys_image_guid = 0;
271     dsr->caps.node_guid = dev->node_guid;
272     dsr->caps.phys_port_cnt = MAX_PORTS;
273     dsr->caps.max_pkeys = MAX_PKEYS;
274 }
275 
276 static void uninit_msix(PCIDevice *pdev, int used_vectors)
277 {
278     PVRDMADev *dev = PVRDMA_DEV(pdev);
279     int i;
280 
281     for (i = 0; i < used_vectors; i++) {
282         msix_vector_unuse(pdev, i);
283     }
284 
285     msix_uninit(pdev, &dev->msix, &dev->msix);
286 }
287 
288 static int init_msix(PCIDevice *pdev)
289 {
290     PVRDMADev *dev = PVRDMA_DEV(pdev);
291     int i;
292     int rc;
293 
294     rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX,
295                    RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX,
296                    RDMA_MSIX_PBA, 0, NULL);
297 
298     if (rc < 0) {
299         rdma_error_report("Failed to initialize MSI-X");
300         return rc;
301     }
302 
303     for (i = 0; i < RDMA_MAX_INTRS; i++) {
304         rc = msix_vector_use(PCI_DEVICE(dev), i);
305         if (rc < 0) {
306             rdma_error_report("Fail mark MSI-X vector %d", i);
307             uninit_msix(pdev, i);
308             return rc;
309         }
310     }
311 
312     return 0;
313 }
314 
315 static void pvrdma_fini(PCIDevice *pdev)
316 {
317     PVRDMADev *dev = PVRDMA_DEV(pdev);
318 
319     notifier_remove(&dev->shutdown_notifier);
320 
321     pvrdma_qp_ops_fini();
322 
323     rdma_backend_stop(&dev->backend_dev);
324 
325     rdma_rm_fini(&dev->rdma_dev_res, &dev->backend_dev,
326                  dev->backend_eth_device_name);
327 
328     rdma_backend_fini(&dev->backend_dev);
329 
330     free_dsr(dev);
331 
332     if (msix_enabled(pdev)) {
333         uninit_msix(pdev, RDMA_MAX_INTRS);
334     }
335 
336     rdma_info_report("Device %s %x.%x is down", pdev->name,
337                      PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
338 }
339 
340 static void pvrdma_stop(PVRDMADev *dev)
341 {
342     rdma_backend_stop(&dev->backend_dev);
343 }
344 
345 static void pvrdma_start(PVRDMADev *dev)
346 {
347     rdma_backend_start(&dev->backend_dev);
348 }
349 
350 static void activate_device(PVRDMADev *dev)
351 {
352     pvrdma_start(dev);
353     set_reg_val(dev, PVRDMA_REG_ERR, 0);
354 }
355 
356 static int unquiesce_device(PVRDMADev *dev)
357 {
358     return 0;
359 }
360 
361 static void reset_device(PVRDMADev *dev)
362 {
363     pvrdma_stop(dev);
364 }
365 
366 static uint64_t pvrdma_regs_read(void *opaque, hwaddr addr, unsigned size)
367 {
368     PVRDMADev *dev = opaque;
369     uint32_t val;
370 
371     dev->stats.regs_reads++;
372 
373     if (get_reg_val(dev, addr, &val)) {
374         rdma_error_report("Failed to read REG value from address 0x%x",
375                           (uint32_t)addr);
376         return -EINVAL;
377     }
378 
379     trace_pvrdma_regs_read(addr, val);
380 
381     return val;
382 }
383 
384 static void pvrdma_regs_write(void *opaque, hwaddr addr, uint64_t val,
385                               unsigned size)
386 {
387     PVRDMADev *dev = opaque;
388 
389     dev->stats.regs_writes++;
390 
391     if (set_reg_val(dev, addr, val)) {
392         rdma_error_report("Failed to set REG value, addr=0x%"PRIx64 ", val=0x%"PRIx64,
393                           addr, val);
394         return;
395     }
396 
397     switch (addr) {
398     case PVRDMA_REG_DSRLOW:
399         trace_pvrdma_regs_write(addr, val, "DSRLOW", "");
400         dev->dsr_info.dma = val;
401         break;
402     case PVRDMA_REG_DSRHIGH:
403         trace_pvrdma_regs_write(addr, val, "DSRHIGH", "");
404         dev->dsr_info.dma |= val << 32;
405         load_dsr(dev);
406         init_dsr_dev_caps(dev);
407         break;
408     case PVRDMA_REG_CTL:
409         switch (val) {
410         case PVRDMA_DEVICE_CTL_ACTIVATE:
411             trace_pvrdma_regs_write(addr, val, "CTL", "ACTIVATE");
412             activate_device(dev);
413             break;
414         case PVRDMA_DEVICE_CTL_UNQUIESCE:
415             trace_pvrdma_regs_write(addr, val, "CTL", "UNQUIESCE");
416             unquiesce_device(dev);
417             break;
418         case PVRDMA_DEVICE_CTL_RESET:
419             trace_pvrdma_regs_write(addr, val, "CTL", "URESET");
420             reset_device(dev);
421             break;
422         }
423         break;
424     case PVRDMA_REG_IMR:
425         trace_pvrdma_regs_write(addr, val, "INTR_MASK", "");
426         dev->interrupt_mask = val;
427         break;
428     case PVRDMA_REG_REQUEST:
429         if (val == 0) {
430             trace_pvrdma_regs_write(addr, val, "REQUEST", "");
431             pvrdma_exec_cmd(dev);
432         }
433         break;
434     default:
435         break;
436     }
437 }
438 
439 static const MemoryRegionOps regs_ops = {
440     .read = pvrdma_regs_read,
441     .write = pvrdma_regs_write,
442     .endianness = DEVICE_LITTLE_ENDIAN,
443     .impl = {
444         .min_access_size = sizeof(uint32_t),
445         .max_access_size = sizeof(uint32_t),
446     },
447 };
448 
449 static uint64_t pvrdma_uar_read(void *opaque, hwaddr addr, unsigned size)
450 {
451     return 0xffffffff;
452 }
453 
454 static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val,
455                              unsigned size)
456 {
457     PVRDMADev *dev = opaque;
458 
459     dev->stats.uar_writes++;
460 
461     switch (addr & 0xFFF) { /* Mask with 0xFFF as each UC gets page */
462     case PVRDMA_UAR_QP_OFFSET:
463         if (val & PVRDMA_UAR_QP_SEND) {
464             trace_pvrdma_uar_write(addr, val, "QP", "SEND",
465                                    val & PVRDMA_UAR_HANDLE_MASK, 0);
466             pvrdma_qp_send(dev, val & PVRDMA_UAR_HANDLE_MASK);
467         }
468         if (val & PVRDMA_UAR_QP_RECV) {
469             trace_pvrdma_uar_write(addr, val, "QP", "RECV",
470                                    val & PVRDMA_UAR_HANDLE_MASK, 0);
471             pvrdma_qp_recv(dev, val & PVRDMA_UAR_HANDLE_MASK);
472         }
473         break;
474     case PVRDMA_UAR_CQ_OFFSET:
475         if (val & PVRDMA_UAR_CQ_ARM) {
476             trace_pvrdma_uar_write(addr, val, "CQ", "ARM",
477                                    val & PVRDMA_UAR_HANDLE_MASK,
478                                    !!(val & PVRDMA_UAR_CQ_ARM_SOL));
479             rdma_rm_req_notify_cq(&dev->rdma_dev_res,
480                                   val & PVRDMA_UAR_HANDLE_MASK,
481                                   !!(val & PVRDMA_UAR_CQ_ARM_SOL));
482         }
483         if (val & PVRDMA_UAR_CQ_ARM_SOL) {
484             trace_pvrdma_uar_write(addr, val, "CQ", "ARMSOL - not supported", 0,
485                                    0);
486         }
487         if (val & PVRDMA_UAR_CQ_POLL) {
488             trace_pvrdma_uar_write(addr, val, "CQ", "POLL",
489                                    val & PVRDMA_UAR_HANDLE_MASK, 0);
490             pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK);
491         }
492         break;
493     case PVRDMA_UAR_SRQ_OFFSET:
494         if (val & PVRDMA_UAR_SRQ_RECV) {
495             trace_pvrdma_uar_write(addr, val, "QP", "SRQ",
496                                    val & PVRDMA_UAR_HANDLE_MASK, 0);
497             pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK);
498         }
499         break;
500     default:
501         rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64,
502                           addr, val);
503         break;
504     }
505 }
506 
507 static const MemoryRegionOps uar_ops = {
508     .read = pvrdma_uar_read,
509     .write = pvrdma_uar_write,
510     .endianness = DEVICE_LITTLE_ENDIAN,
511     .impl = {
512         .min_access_size = sizeof(uint32_t),
513         .max_access_size = sizeof(uint32_t),
514     },
515 };
516 
517 static void init_pci_config(PCIDevice *pdev)
518 {
519     pdev->config[PCI_INTERRUPT_PIN] = 1;
520 }
521 
522 static void init_bars(PCIDevice *pdev)
523 {
524     PVRDMADev *dev = PVRDMA_DEV(pdev);
525 
526     /* BAR 0 - MSI-X */
527     memory_region_init(&dev->msix, OBJECT(dev), "pvrdma-msix",
528                        RDMA_BAR0_MSIX_SIZE);
529     pci_register_bar(pdev, RDMA_MSIX_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY,
530                      &dev->msix);
531 
532     /* BAR 1 - Registers */
533     memset(&dev->regs_data, 0, sizeof(dev->regs_data));
534     memory_region_init_io(&dev->regs, OBJECT(dev), &regs_ops, dev,
535                           "pvrdma-regs", sizeof(dev->regs_data));
536     pci_register_bar(pdev, RDMA_REG_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY,
537                      &dev->regs);
538 
539     /* BAR 2 - UAR */
540     memset(&dev->uar_data, 0, sizeof(dev->uar_data));
541     memory_region_init_io(&dev->uar, OBJECT(dev), &uar_ops, dev, "rdma-uar",
542                           sizeof(dev->uar_data));
543     pci_register_bar(pdev, RDMA_UAR_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY,
544                      &dev->uar);
545 }
546 
547 static void init_regs(PCIDevice *pdev)
548 {
549     PVRDMADev *dev = PVRDMA_DEV(pdev);
550 
551     set_reg_val(dev, PVRDMA_REG_VERSION, PVRDMA_HW_VERSION);
552     set_reg_val(dev, PVRDMA_REG_ERR, 0xFFFF);
553 }
554 
555 static void init_dev_caps(PVRDMADev *dev)
556 {
557     size_t pg_tbl_bytes = TARGET_PAGE_SIZE *
558                           (TARGET_PAGE_SIZE / sizeof(uint64_t));
559     size_t wr_sz = MAX(sizeof(struct pvrdma_sq_wqe_hdr),
560                        sizeof(struct pvrdma_rq_wqe_hdr));
561 
562     dev->dev_attr.max_qp_wr = pg_tbl_bytes /
563                               (wr_sz + sizeof(struct pvrdma_sge) *
564                               dev->dev_attr.max_sge) - TARGET_PAGE_SIZE;
565                               /* First page is ring state  ^^^^ */
566 
567     dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) -
568                             TARGET_PAGE_SIZE; /* First page is ring state */
569 
570     dev->dev_attr.max_srq_wr = pg_tbl_bytes /
571                                 ((sizeof(struct pvrdma_rq_wqe_hdr) +
572                                 sizeof(struct pvrdma_sge)) *
573                                 dev->dev_attr.max_sge) - TARGET_PAGE_SIZE;
574 }
575 
576 static int pvrdma_check_ram_shared(Object *obj, void *opaque)
577 {
578     bool *shared = opaque;
579 
580     if (object_dynamic_cast(obj, "memory-backend-ram")) {
581         *shared = object_property_get_bool(obj, "share", NULL);
582     }
583 
584     return 0;
585 }
586 
587 static void pvrdma_shutdown_notifier(Notifier *n, void *opaque)
588 {
589     PVRDMADev *dev = container_of(n, PVRDMADev, shutdown_notifier);
590     PCIDevice *pci_dev = PCI_DEVICE(dev);
591 
592     pvrdma_fini(pci_dev);
593 }
594 
595 static void pvrdma_realize(PCIDevice *pdev, Error **errp)
596 {
597     int rc = 0;
598     PVRDMADev *dev = PVRDMA_DEV(pdev);
599     Object *memdev_root;
600     bool ram_shared = false;
601     PCIDevice *func0;
602 
603     rdma_info_report("Initializing device %s %x.%x", pdev->name,
604                      PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
605 
606     if (TARGET_PAGE_SIZE != getpagesize()) {
607         error_setg(errp, "Target page size must be the same as host page size");
608         return;
609     }
610 
611     func0 = pci_get_function_0(pdev);
612     /* Break if not vmxnet3 device in slot 0 */
613     if (strcmp(object_get_typename(OBJECT(func0)), TYPE_VMXNET3)) {
614         error_setg(errp, "Device on %x.0 must be %s", PCI_SLOT(pdev->devfn),
615                    TYPE_VMXNET3);
616         return;
617     }
618     dev->func0 = VMXNET3(func0);
619 
620     addrconf_addr_eui48((unsigned char *)&dev->node_guid,
621                         (const char *)&dev->func0->conf.macaddr.a);
622 
623     memdev_root = object_resolve_path("/objects", NULL);
624     if (memdev_root) {
625         object_child_foreach(memdev_root, pvrdma_check_ram_shared, &ram_shared);
626     }
627     if (!ram_shared) {
628         error_setg(errp, "Only shared memory backed ram is supported");
629         return;
630     }
631 
632     dev->dsr_info.dsr = NULL;
633 
634     init_pci_config(pdev);
635 
636     init_bars(pdev);
637 
638     init_regs(pdev);
639 
640     rc = init_msix(pdev);
641     if (rc) {
642         goto out;
643     }
644 
645     rc = rdma_backend_init(&dev->backend_dev, pdev, &dev->rdma_dev_res,
646                            dev->backend_device_name, dev->backend_port_num,
647                            &dev->dev_attr, &dev->mad_chr);
648     if (rc) {
649         goto out;
650     }
651 
652     init_dev_caps(dev);
653 
654     rc = rdma_rm_init(&dev->rdma_dev_res, &dev->dev_attr);
655     if (rc) {
656         goto out;
657     }
658 
659     rc = pvrdma_qp_ops_init();
660     if (rc) {
661         goto out;
662     }
663 
664     memset(&dev->stats, 0, sizeof(dev->stats));
665 
666     dev->shutdown_notifier.notify = pvrdma_shutdown_notifier;
667     qemu_register_shutdown_notifier(&dev->shutdown_notifier);
668 
669 out:
670     if (rc) {
671         pvrdma_fini(pdev);
672         error_append_hint(errp, "Device failed to load\n");
673     }
674 }
675 
676 static void pvrdma_class_init(ObjectClass *klass, void *data)
677 {
678     DeviceClass *dc = DEVICE_CLASS(klass);
679     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
680     RdmaProviderClass *ir = INTERFACE_RDMA_PROVIDER_CLASS(klass);
681 
682     k->realize = pvrdma_realize;
683     k->vendor_id = PCI_VENDOR_ID_VMWARE;
684     k->device_id = PCI_DEVICE_ID_VMWARE_PVRDMA;
685     k->revision = 0x00;
686     k->class_id = PCI_CLASS_NETWORK_OTHER;
687 
688     dc->desc = "RDMA Device";
689     dc->props = pvrdma_dev_properties;
690     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
691 
692     ir->print_statistics = pvrdma_print_statistics;
693 }
694 
695 static const TypeInfo pvrdma_info = {
696     .name = PVRDMA_HW_NAME,
697     .parent = TYPE_PCI_DEVICE,
698     .instance_size = sizeof(PVRDMADev),
699     .class_init = pvrdma_class_init,
700     .interfaces = (InterfaceInfo[]) {
701         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
702         { INTERFACE_RDMA_PROVIDER },
703         { }
704     }
705 };
706 
707 static void register_types(void)
708 {
709     type_register_static(&pvrdma_info);
710 }
711 
712 type_init(register_types)
713