xref: /openbmc/qemu/hw/remote/vfio-user-obj.c (revision 1a648f7ae4d3ac97ef0855baec46047ea21a400a)
18f9a9259SJagannathan Raman /**
28f9a9259SJagannathan Raman  * QEMU vfio-user-server server object
38f9a9259SJagannathan Raman  *
48f9a9259SJagannathan Raman  * Copyright © 2022 Oracle and/or its affiliates.
58f9a9259SJagannathan Raman  *
68f9a9259SJagannathan Raman  * This work is licensed under the terms of the GNU GPL-v2, version 2 or later.
78f9a9259SJagannathan Raman  *
88f9a9259SJagannathan Raman  * See the COPYING file in the top-level directory.
98f9a9259SJagannathan Raman  *
108f9a9259SJagannathan Raman  */
118f9a9259SJagannathan Raman 
128f9a9259SJagannathan Raman /**
138f9a9259SJagannathan Raman  * Usage: add options:
148f9a9259SJagannathan Raman  *     -machine x-remote,vfio-user=on,auto-shutdown=on
158f9a9259SJagannathan Raman  *     -device <PCI-device>,id=<pci-dev-id>
168f9a9259SJagannathan Raman  *     -object x-vfio-user-server,id=<id>,type=unix,path=<socket-path>,
178f9a9259SJagannathan Raman  *             device=<pci-dev-id>
188f9a9259SJagannathan Raman  *
198f9a9259SJagannathan Raman  * Note that x-vfio-user-server object must be used with x-remote machine only.
208f9a9259SJagannathan Raman  * This server could only support PCI devices for now.
218f9a9259SJagannathan Raman  *
228f9a9259SJagannathan Raman  * type - SocketAddress type - presently "unix" alone is supported. Required
238f9a9259SJagannathan Raman  *        option
248f9a9259SJagannathan Raman  *
258f9a9259SJagannathan Raman  * path - named unix socket, it will be created by the server. It is
268f9a9259SJagannathan Raman  *        a required option
278f9a9259SJagannathan Raman  *
288f9a9259SJagannathan Raman  * device - id of a device on the server, a required option. PCI devices
298f9a9259SJagannathan Raman  *          alone are supported presently.
309fb3fba1SJagannathan Raman  *
319fb3fba1SJagannathan Raman  * notes - x-vfio-user-server could block IO and monitor during the
329fb3fba1SJagannathan Raman  *         initialization phase.
33c1454758SJagannathan Raman  *
34c1454758SJagannathan Raman  *         When x-remote machine has the auto-shutdown property
35c1454758SJagannathan Raman  *         enabled (default), x-vfio-user-server terminates after the last
36c1454758SJagannathan Raman  *         client disconnects. Otherwise, it will continue running until
37c1454758SJagannathan Raman  *         explicitly killed.
388f9a9259SJagannathan Raman  */
398f9a9259SJagannathan Raman 
408f9a9259SJagannathan Raman #include "qemu/osdep.h"
418f9a9259SJagannathan Raman 
428f9a9259SJagannathan Raman #include "qom/object.h"
438f9a9259SJagannathan Raman #include "qom/object_interfaces.h"
448f9a9259SJagannathan Raman #include "qemu/error-report.h"
458f9a9259SJagannathan Raman #include "trace.h"
468f9a9259SJagannathan Raman #include "sysemu/runstate.h"
478f9a9259SJagannathan Raman #include "hw/boards.h"
488f9a9259SJagannathan Raman #include "hw/remote/machine.h"
498f9a9259SJagannathan Raman #include "qapi/error.h"
508f9a9259SJagannathan Raman #include "qapi/qapi-visit-sockets.h"
519fb3fba1SJagannathan Raman #include "qapi/qapi-events-misc.h"
5287f7249fSJagannathan Raman #include "qemu/notify.h"
539fb3fba1SJagannathan Raman #include "qemu/thread.h"
5490072f29SJagannathan Raman #include "qemu/main-loop.h"
5587f7249fSJagannathan Raman #include "sysemu/sysemu.h"
5687f7249fSJagannathan Raman #include "libvfio-user.h"
57a6e8d6d9SJagannathan Raman #include "hw/qdev-core.h"
58a6e8d6d9SJagannathan Raman #include "hw/pci/pci.h"
599fb3fba1SJagannathan Raman #include "qemu/timer.h"
603123f93dSJagannathan Raman #include "exec/memory.h"
6108cf3dc6SJagannathan Raman #include "hw/pci/msi.h"
6208cf3dc6SJagannathan Raman #include "hw/pci/msix.h"
6308cf3dc6SJagannathan Raman #include "hw/remote/vfio-user-obj.h"
648f9a9259SJagannathan Raman 
658f9a9259SJagannathan Raman #define TYPE_VFU_OBJECT "x-vfio-user-server"
668f9a9259SJagannathan Raman OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
678f9a9259SJagannathan Raman 
688f9a9259SJagannathan Raman /**
69c1454758SJagannathan Raman  * VFU_OBJECT_ERROR - reports an error message.
70c1454758SJagannathan Raman  *
71c1454758SJagannathan Raman  * If auto_shutdown is set, it aborts the machine on error. Otherwise,
72c1454758SJagannathan Raman  * it logs an error message without aborting. auto_shutdown is disabled
73c1454758SJagannathan Raman  * when the server serves clients from multiple VMs; as such, an error
74c1454758SJagannathan Raman  * from one VM shouldn't be able to disrupt other VM's services.
758f9a9259SJagannathan Raman  */
768f9a9259SJagannathan Raman #define VFU_OBJECT_ERROR(o, fmt, ...)                                     \
778f9a9259SJagannathan Raman     {                                                                     \
788f9a9259SJagannathan Raman         if (vfu_object_auto_shutdown()) {                                 \
798f9a9259SJagannathan Raman             error_setg(&error_abort, (fmt), ## __VA_ARGS__);              \
808f9a9259SJagannathan Raman         } else {                                                          \
818f9a9259SJagannathan Raman             error_report((fmt), ## __VA_ARGS__);                          \
828f9a9259SJagannathan Raman         }                                                                 \
838f9a9259SJagannathan Raman     }                                                                     \
848f9a9259SJagannathan Raman 
858f9a9259SJagannathan Raman struct VfuObjectClass {
868f9a9259SJagannathan Raman     ObjectClass parent_class;
878f9a9259SJagannathan Raman 
888f9a9259SJagannathan Raman     unsigned int nr_devs;
898f9a9259SJagannathan Raman };
908f9a9259SJagannathan Raman 
918f9a9259SJagannathan Raman struct VfuObject {
928f9a9259SJagannathan Raman     /* private */
938f9a9259SJagannathan Raman     Object parent;
948f9a9259SJagannathan Raman 
958f9a9259SJagannathan Raman     SocketAddress *socket;
968f9a9259SJagannathan Raman 
978f9a9259SJagannathan Raman     char *device;
988f9a9259SJagannathan Raman 
998f9a9259SJagannathan Raman     Error *err;
10087f7249fSJagannathan Raman 
10187f7249fSJagannathan Raman     Notifier machine_done;
10287f7249fSJagannathan Raman 
10387f7249fSJagannathan Raman     vfu_ctx_t *vfu_ctx;
104a6e8d6d9SJagannathan Raman 
105a6e8d6d9SJagannathan Raman     PCIDevice *pci_dev;
106a6e8d6d9SJagannathan Raman 
107a6e8d6d9SJagannathan Raman     Error *unplug_blocker;
1089fb3fba1SJagannathan Raman 
1099fb3fba1SJagannathan Raman     int vfu_poll_fd;
11008cf3dc6SJagannathan Raman 
11108cf3dc6SJagannathan Raman     MSITriggerFunc *default_msi_trigger;
11208cf3dc6SJagannathan Raman     MSIPrepareMessageFunc *default_msi_prepare_message;
11308cf3dc6SJagannathan Raman     MSIxPrepareMessageFunc *default_msix_prepare_message;
1148f9a9259SJagannathan Raman };
1158f9a9259SJagannathan Raman 
11687f7249fSJagannathan Raman static void vfu_object_init_ctx(VfuObject *o, Error **errp);
11787f7249fSJagannathan Raman 
vfu_object_auto_shutdown(void)1188f9a9259SJagannathan Raman static bool vfu_object_auto_shutdown(void)
1198f9a9259SJagannathan Raman {
1208f9a9259SJagannathan Raman     bool auto_shutdown = true;
1218f9a9259SJagannathan Raman     Error *local_err = NULL;
1228f9a9259SJagannathan Raman 
1238f9a9259SJagannathan Raman     if (!current_machine) {
1248f9a9259SJagannathan Raman         return auto_shutdown;
1258f9a9259SJagannathan Raman     }
1268f9a9259SJagannathan Raman 
1278f9a9259SJagannathan Raman     auto_shutdown = object_property_get_bool(OBJECT(current_machine),
1288f9a9259SJagannathan Raman                                              "auto-shutdown",
1298f9a9259SJagannathan Raman                                              &local_err);
1308f9a9259SJagannathan Raman 
1318f9a9259SJagannathan Raman     /*
1328f9a9259SJagannathan Raman      * local_err would be set if no such property exists - safe to ignore.
1338f9a9259SJagannathan Raman      * Unlikely scenario as auto-shutdown is always defined for
1348f9a9259SJagannathan Raman      * TYPE_REMOTE_MACHINE, and  TYPE_VFU_OBJECT only works with
1358f9a9259SJagannathan Raman      * TYPE_REMOTE_MACHINE
1368f9a9259SJagannathan Raman      */
1378f9a9259SJagannathan Raman     if (local_err) {
1388f9a9259SJagannathan Raman         auto_shutdown = true;
1398f9a9259SJagannathan Raman         error_free(local_err);
1408f9a9259SJagannathan Raman     }
1418f9a9259SJagannathan Raman 
1428f9a9259SJagannathan Raman     return auto_shutdown;
1438f9a9259SJagannathan Raman }
1448f9a9259SJagannathan Raman 
vfu_object_set_socket(Object * obj,Visitor * v,const char * name,void * opaque,Error ** errp)1458f9a9259SJagannathan Raman static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name,
1468f9a9259SJagannathan Raman                                   void *opaque, Error **errp)
1478f9a9259SJagannathan Raman {
1488f9a9259SJagannathan Raman     VfuObject *o = VFU_OBJECT(obj);
1498f9a9259SJagannathan Raman 
15087f7249fSJagannathan Raman     if (o->vfu_ctx) {
15187f7249fSJagannathan Raman         error_setg(errp, "vfu: Unable to set socket property - server busy");
15287f7249fSJagannathan Raman         return;
15387f7249fSJagannathan Raman     }
15487f7249fSJagannathan Raman 
1558f9a9259SJagannathan Raman     qapi_free_SocketAddress(o->socket);
1568f9a9259SJagannathan Raman 
1578f9a9259SJagannathan Raman     o->socket = NULL;
1588f9a9259SJagannathan Raman 
1598f9a9259SJagannathan Raman     visit_type_SocketAddress(v, name, &o->socket, errp);
1608f9a9259SJagannathan Raman 
1618f9a9259SJagannathan Raman     if (o->socket->type != SOCKET_ADDRESS_TYPE_UNIX) {
1628f9a9259SJagannathan Raman         error_setg(errp, "vfu: Unsupported socket type - %s",
1638f9a9259SJagannathan Raman                    SocketAddressType_str(o->socket->type));
1648f9a9259SJagannathan Raman         qapi_free_SocketAddress(o->socket);
1658f9a9259SJagannathan Raman         o->socket = NULL;
1668f9a9259SJagannathan Raman         return;
1678f9a9259SJagannathan Raman     }
1688f9a9259SJagannathan Raman 
1698f9a9259SJagannathan Raman     trace_vfu_prop("socket", o->socket->u.q_unix.path);
17087f7249fSJagannathan Raman 
17187f7249fSJagannathan Raman     vfu_object_init_ctx(o, errp);
1728f9a9259SJagannathan Raman }
1738f9a9259SJagannathan Raman 
vfu_object_set_device(Object * obj,const char * str,Error ** errp)1748f9a9259SJagannathan Raman static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
1758f9a9259SJagannathan Raman {
1768f9a9259SJagannathan Raman     VfuObject *o = VFU_OBJECT(obj);
1778f9a9259SJagannathan Raman 
17887f7249fSJagannathan Raman     if (o->vfu_ctx) {
17987f7249fSJagannathan Raman         error_setg(errp, "vfu: Unable to set device property - server busy");
18087f7249fSJagannathan Raman         return;
18187f7249fSJagannathan Raman     }
18287f7249fSJagannathan Raman 
1838f9a9259SJagannathan Raman     g_free(o->device);
1848f9a9259SJagannathan Raman 
1858f9a9259SJagannathan Raman     o->device = g_strdup(str);
1868f9a9259SJagannathan Raman 
1878f9a9259SJagannathan Raman     trace_vfu_prop("device", str);
18887f7249fSJagannathan Raman 
18987f7249fSJagannathan Raman     vfu_object_init_ctx(o, errp);
19087f7249fSJagannathan Raman }
19187f7249fSJagannathan Raman 
vfu_object_ctx_run(void * opaque)1929fb3fba1SJagannathan Raman static void vfu_object_ctx_run(void *opaque)
1939fb3fba1SJagannathan Raman {
1949fb3fba1SJagannathan Raman     VfuObject *o = opaque;
1959fb3fba1SJagannathan Raman     const char *vfu_id;
1969fb3fba1SJagannathan Raman     char *vfu_path, *pci_dev_path;
1979fb3fba1SJagannathan Raman     int ret = -1;
1989fb3fba1SJagannathan Raman 
1999fb3fba1SJagannathan Raman     while (ret != 0) {
2009fb3fba1SJagannathan Raman         ret = vfu_run_ctx(o->vfu_ctx);
2019fb3fba1SJagannathan Raman         if (ret < 0) {
2029fb3fba1SJagannathan Raman             if (errno == EINTR) {
2039fb3fba1SJagannathan Raman                 continue;
2049fb3fba1SJagannathan Raman             } else if (errno == ENOTCONN) {
2059fb3fba1SJagannathan Raman                 vfu_id = object_get_canonical_path_component(OBJECT(o));
2069fb3fba1SJagannathan Raman                 vfu_path = object_get_canonical_path(OBJECT(o));
2079fb3fba1SJagannathan Raman                 g_assert(o->pci_dev);
2089fb3fba1SJagannathan Raman                 pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
2099fb3fba1SJagannathan Raman                  /* o->device is a required property and is non-NULL here */
2109fb3fba1SJagannathan Raman                 g_assert(o->device);
2119fb3fba1SJagannathan Raman                 qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
2129fb3fba1SJagannathan Raman                                                   o->device, pci_dev_path);
2139fb3fba1SJagannathan Raman                 qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
2149fb3fba1SJagannathan Raman                 o->vfu_poll_fd = -1;
2159fb3fba1SJagannathan Raman                 object_unparent(OBJECT(o));
2169fb3fba1SJagannathan Raman                 g_free(vfu_path);
2179fb3fba1SJagannathan Raman                 g_free(pci_dev_path);
2189fb3fba1SJagannathan Raman                 break;
2199fb3fba1SJagannathan Raman             } else {
2209fb3fba1SJagannathan Raman                 VFU_OBJECT_ERROR(o, "vfu: Failed to run device %s - %s",
2219fb3fba1SJagannathan Raman                                  o->device, strerror(errno));
2229fb3fba1SJagannathan Raman                 break;
2239fb3fba1SJagannathan Raman             }
2249fb3fba1SJagannathan Raman         }
2259fb3fba1SJagannathan Raman     }
2269fb3fba1SJagannathan Raman }
2279fb3fba1SJagannathan Raman 
vfu_object_attach_ctx(void * opaque)2289fb3fba1SJagannathan Raman static void vfu_object_attach_ctx(void *opaque)
2299fb3fba1SJagannathan Raman {
2309fb3fba1SJagannathan Raman     VfuObject *o = opaque;
2319fb3fba1SJagannathan Raman     GPollFD pfds[1];
2329fb3fba1SJagannathan Raman     int ret;
2339fb3fba1SJagannathan Raman 
2349fb3fba1SJagannathan Raman     qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
2359fb3fba1SJagannathan Raman 
2369fb3fba1SJagannathan Raman     pfds[0].fd = o->vfu_poll_fd;
2379fb3fba1SJagannathan Raman     pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
2389fb3fba1SJagannathan Raman 
2399fb3fba1SJagannathan Raman retry_attach:
2409fb3fba1SJagannathan Raman     ret = vfu_attach_ctx(o->vfu_ctx);
2419fb3fba1SJagannathan Raman     if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
2429fb3fba1SJagannathan Raman         /**
2439fb3fba1SJagannathan Raman          * vfu_object_attach_ctx can block QEMU's main loop
2449fb3fba1SJagannathan Raman          * during attach - the monitor and other IO
2459fb3fba1SJagannathan Raman          * could be unresponsive during this time.
2469fb3fba1SJagannathan Raman          */
2479fb3fba1SJagannathan Raman         (void)qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
2489fb3fba1SJagannathan Raman         goto retry_attach;
2499fb3fba1SJagannathan Raman     } else if (ret < 0) {
2509fb3fba1SJagannathan Raman         VFU_OBJECT_ERROR(o, "vfu: Failed to attach device %s to context - %s",
2519fb3fba1SJagannathan Raman                          o->device, strerror(errno));
2529fb3fba1SJagannathan Raman         return;
2539fb3fba1SJagannathan Raman     }
2549fb3fba1SJagannathan Raman 
2559fb3fba1SJagannathan Raman     o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
2569fb3fba1SJagannathan Raman     if (o->vfu_poll_fd < 0) {
2579fb3fba1SJagannathan Raman         VFU_OBJECT_ERROR(o, "vfu: Failed to get poll fd %s", o->device);
2589fb3fba1SJagannathan Raman         return;
2599fb3fba1SJagannathan Raman     }
2609fb3fba1SJagannathan Raman 
2619fb3fba1SJagannathan Raman     qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o);
2629fb3fba1SJagannathan Raman }
2639fb3fba1SJagannathan Raman 
vfu_object_cfg_access(vfu_ctx_t * vfu_ctx,char * const buf,size_t count,loff_t offset,const bool is_write)26490072f29SJagannathan Raman static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf,
26590072f29SJagannathan Raman                                      size_t count, loff_t offset,
26690072f29SJagannathan Raman                                      const bool is_write)
26790072f29SJagannathan Raman {
26890072f29SJagannathan Raman     VfuObject *o = vfu_get_private(vfu_ctx);
26990072f29SJagannathan Raman     uint32_t pci_access_width = sizeof(uint32_t);
27090072f29SJagannathan Raman     size_t bytes = count;
27190072f29SJagannathan Raman     uint32_t val = 0;
27290072f29SJagannathan Raman     char *ptr = buf;
27390072f29SJagannathan Raman     int len;
27490072f29SJagannathan Raman 
27590072f29SJagannathan Raman     /*
27690072f29SJagannathan Raman      * Writes to the BAR registers would trigger an update to the
27790072f29SJagannathan Raman      * global Memory and IO AddressSpaces. But the remote device
27890072f29SJagannathan Raman      * never uses the global AddressSpaces, therefore overlapping
27990072f29SJagannathan Raman      * memory regions are not a problem
28090072f29SJagannathan Raman      */
28190072f29SJagannathan Raman     while (bytes > 0) {
28290072f29SJagannathan Raman         len = (bytes > pci_access_width) ? pci_access_width : bytes;
28390072f29SJagannathan Raman         if (is_write) {
284*e6578f1fSMattias Nissler             val = ldn_le_p(ptr, len);
28590072f29SJagannathan Raman             pci_host_config_write_common(o->pci_dev, offset,
28690072f29SJagannathan Raman                                          pci_config_size(o->pci_dev),
28790072f29SJagannathan Raman                                          val, len);
28890072f29SJagannathan Raman             trace_vfu_cfg_write(offset, val);
28990072f29SJagannathan Raman         } else {
29090072f29SJagannathan Raman             val = pci_host_config_read_common(o->pci_dev, offset,
29190072f29SJagannathan Raman                                               pci_config_size(o->pci_dev), len);
292*e6578f1fSMattias Nissler             stn_le_p(ptr, len, val);
29390072f29SJagannathan Raman             trace_vfu_cfg_read(offset, val);
29490072f29SJagannathan Raman         }
29590072f29SJagannathan Raman         offset += len;
29690072f29SJagannathan Raman         ptr += len;
29790072f29SJagannathan Raman         bytes -= len;
29890072f29SJagannathan Raman     }
29990072f29SJagannathan Raman 
30090072f29SJagannathan Raman     return count;
30190072f29SJagannathan Raman }
30290072f29SJagannathan Raman 
dma_register(vfu_ctx_t * vfu_ctx,vfu_dma_info_t * info)30315ccf9beSJagannathan Raman static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
30415ccf9beSJagannathan Raman {
30515ccf9beSJagannathan Raman     VfuObject *o = vfu_get_private(vfu_ctx);
30615ccf9beSJagannathan Raman     AddressSpace *dma_as = NULL;
30715ccf9beSJagannathan Raman     MemoryRegion *subregion = NULL;
30815ccf9beSJagannathan Raman     g_autofree char *name = NULL;
30915ccf9beSJagannathan Raman     struct iovec *iov = &info->iova;
31015ccf9beSJagannathan Raman 
31115ccf9beSJagannathan Raman     if (!info->vaddr) {
31215ccf9beSJagannathan Raman         return;
31315ccf9beSJagannathan Raman     }
31415ccf9beSJagannathan Raman 
31515ccf9beSJagannathan Raman     name = g_strdup_printf("mem-%s-%"PRIx64"", o->device,
31615ccf9beSJagannathan Raman                            (uint64_t)info->vaddr);
31715ccf9beSJagannathan Raman 
31815ccf9beSJagannathan Raman     subregion = g_new0(MemoryRegion, 1);
31915ccf9beSJagannathan Raman 
32015ccf9beSJagannathan Raman     memory_region_init_ram_ptr(subregion, NULL, name,
32115ccf9beSJagannathan Raman                                iov->iov_len, info->vaddr);
32215ccf9beSJagannathan Raman 
32315ccf9beSJagannathan Raman     dma_as = pci_device_iommu_address_space(o->pci_dev);
32415ccf9beSJagannathan Raman 
32515ccf9beSJagannathan Raman     memory_region_add_subregion(dma_as->root, (hwaddr)iov->iov_base, subregion);
32615ccf9beSJagannathan Raman 
32715ccf9beSJagannathan Raman     trace_vfu_dma_register((uint64_t)iov->iov_base, iov->iov_len);
32815ccf9beSJagannathan Raman }
32915ccf9beSJagannathan Raman 
dma_unregister(vfu_ctx_t * vfu_ctx,vfu_dma_info_t * info)33015ccf9beSJagannathan Raman static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
33115ccf9beSJagannathan Raman {
33215ccf9beSJagannathan Raman     VfuObject *o = vfu_get_private(vfu_ctx);
33315ccf9beSJagannathan Raman     AddressSpace *dma_as = NULL;
33415ccf9beSJagannathan Raman     MemoryRegion *mr = NULL;
33515ccf9beSJagannathan Raman     ram_addr_t offset;
33615ccf9beSJagannathan Raman 
33715ccf9beSJagannathan Raman     mr = memory_region_from_host(info->vaddr, &offset);
33815ccf9beSJagannathan Raman     if (!mr) {
33915ccf9beSJagannathan Raman         return;
34015ccf9beSJagannathan Raman     }
34115ccf9beSJagannathan Raman 
34215ccf9beSJagannathan Raman     dma_as = pci_device_iommu_address_space(o->pci_dev);
34315ccf9beSJagannathan Raman 
34415ccf9beSJagannathan Raman     memory_region_del_subregion(dma_as->root, mr);
34515ccf9beSJagannathan Raman 
34615ccf9beSJagannathan Raman     object_unparent((OBJECT(mr)));
34715ccf9beSJagannathan Raman 
34815ccf9beSJagannathan Raman     trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
34915ccf9beSJagannathan Raman }
35015ccf9beSJagannathan Raman 
vfu_object_mr_rw(MemoryRegion * mr,uint8_t * buf,hwaddr offset,hwaddr size,const bool is_write)3513123f93dSJagannathan Raman static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset,
3523123f93dSJagannathan Raman                             hwaddr size, const bool is_write)
3533123f93dSJagannathan Raman {
3543123f93dSJagannathan Raman     uint8_t *ptr = buf;
3553123f93dSJagannathan Raman     bool release_lock = false;
3563123f93dSJagannathan Raman     uint8_t *ram_ptr = NULL;
3573123f93dSJagannathan Raman     MemTxResult result;
3583123f93dSJagannathan Raman     int access_size;
3593123f93dSJagannathan Raman     uint64_t val;
3603123f93dSJagannathan Raman 
3613123f93dSJagannathan Raman     if (memory_access_is_direct(mr, is_write)) {
3623123f93dSJagannathan Raman         /**
3633123f93dSJagannathan Raman          * Some devices expose a PCI expansion ROM, which could be buffer
3643123f93dSJagannathan Raman          * based as compared to other regions which are primarily based on
3653123f93dSJagannathan Raman          * MemoryRegionOps. memory_region_find() would already check
3663123f93dSJagannathan Raman          * for buffer overflow, we don't need to repeat it here.
3673123f93dSJagannathan Raman          */
3683123f93dSJagannathan Raman         ram_ptr = memory_region_get_ram_ptr(mr);
3693123f93dSJagannathan Raman 
3703123f93dSJagannathan Raman         if (is_write) {
3713123f93dSJagannathan Raman             memcpy((ram_ptr + offset), buf, size);
3723123f93dSJagannathan Raman         } else {
3733123f93dSJagannathan Raman             memcpy(buf, (ram_ptr + offset), size);
3743123f93dSJagannathan Raman         }
3753123f93dSJagannathan Raman 
3763123f93dSJagannathan Raman         return 0;
3773123f93dSJagannathan Raman     }
3783123f93dSJagannathan Raman 
3793123f93dSJagannathan Raman     while (size) {
3803123f93dSJagannathan Raman         /**
3813123f93dSJagannathan Raman          * The read/write logic used below is similar to the ones in
3823123f93dSJagannathan Raman          * flatview_read/write_continue()
3833123f93dSJagannathan Raman          */
3843123f93dSJagannathan Raman         release_lock = prepare_mmio_access(mr);
3853123f93dSJagannathan Raman 
3863123f93dSJagannathan Raman         access_size = memory_access_size(mr, size, offset);
3873123f93dSJagannathan Raman 
3883123f93dSJagannathan Raman         if (is_write) {
3893123f93dSJagannathan Raman             val = ldn_he_p(ptr, access_size);
3903123f93dSJagannathan Raman 
3913123f93dSJagannathan Raman             result = memory_region_dispatch_write(mr, offset, val,
3923123f93dSJagannathan Raman                                                   size_memop(access_size),
3933123f93dSJagannathan Raman                                                   MEMTXATTRS_UNSPECIFIED);
3943123f93dSJagannathan Raman         } else {
3953123f93dSJagannathan Raman             result = memory_region_dispatch_read(mr, offset, &val,
3963123f93dSJagannathan Raman                                                  size_memop(access_size),
3973123f93dSJagannathan Raman                                                  MEMTXATTRS_UNSPECIFIED);
3983123f93dSJagannathan Raman 
3993123f93dSJagannathan Raman             stn_he_p(ptr, access_size, val);
4003123f93dSJagannathan Raman         }
4013123f93dSJagannathan Raman 
4023123f93dSJagannathan Raman         if (release_lock) {
403195801d7SStefan Hajnoczi             bql_unlock();
4043123f93dSJagannathan Raman             release_lock = false;
4053123f93dSJagannathan Raman         }
4063123f93dSJagannathan Raman 
4073123f93dSJagannathan Raman         if (result != MEMTX_OK) {
4083123f93dSJagannathan Raman             return -1;
4093123f93dSJagannathan Raman         }
4103123f93dSJagannathan Raman 
4113123f93dSJagannathan Raman         size -= access_size;
4123123f93dSJagannathan Raman         ptr += access_size;
4133123f93dSJagannathan Raman         offset += access_size;
4143123f93dSJagannathan Raman     }
4153123f93dSJagannathan Raman 
4163123f93dSJagannathan Raman     return 0;
4173123f93dSJagannathan Raman }
4183123f93dSJagannathan Raman 
vfu_object_bar_rw(PCIDevice * pci_dev,int pci_bar,hwaddr bar_offset,char * const buf,hwaddr len,const bool is_write)4193123f93dSJagannathan Raman static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar,
4203123f93dSJagannathan Raman                                 hwaddr bar_offset, char * const buf,
4213123f93dSJagannathan Raman                                 hwaddr len, const bool is_write)
4223123f93dSJagannathan Raman {
4233123f93dSJagannathan Raman     MemoryRegionSection section = { 0 };
4243123f93dSJagannathan Raman     uint8_t *ptr = (uint8_t *)buf;
4253123f93dSJagannathan Raman     MemoryRegion *section_mr = NULL;
4263123f93dSJagannathan Raman     uint64_t section_size;
4273123f93dSJagannathan Raman     hwaddr section_offset;
4283123f93dSJagannathan Raman     hwaddr size = 0;
4293123f93dSJagannathan Raman 
4303123f93dSJagannathan Raman     while (len) {
4313123f93dSJagannathan Raman         section = memory_region_find(pci_dev->io_regions[pci_bar].memory,
4323123f93dSJagannathan Raman                                      bar_offset, len);
4333123f93dSJagannathan Raman 
4343123f93dSJagannathan Raman         if (!section.mr) {
4353123f93dSJagannathan Raman             warn_report("vfu: invalid address 0x%"PRIx64"", bar_offset);
4363123f93dSJagannathan Raman             return size;
4373123f93dSJagannathan Raman         }
4383123f93dSJagannathan Raman 
4393123f93dSJagannathan Raman         section_mr = section.mr;
4403123f93dSJagannathan Raman         section_offset = section.offset_within_region;
4413123f93dSJagannathan Raman         section_size = int128_get64(section.size);
4423123f93dSJagannathan Raman 
4433123f93dSJagannathan Raman         if (is_write && section_mr->readonly) {
4443123f93dSJagannathan Raman             warn_report("vfu: attempting to write to readonly region in "
4453123f93dSJagannathan Raman                         "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]",
4463123f93dSJagannathan Raman                         pci_bar, bar_offset,
4473123f93dSJagannathan Raman                         (bar_offset + section_size));
4483123f93dSJagannathan Raman             memory_region_unref(section_mr);
4493123f93dSJagannathan Raman             return size;
4503123f93dSJagannathan Raman         }
4513123f93dSJagannathan Raman 
4523123f93dSJagannathan Raman         if (vfu_object_mr_rw(section_mr, ptr, section_offset,
4533123f93dSJagannathan Raman                              section_size, is_write)) {
4543123f93dSJagannathan Raman             warn_report("vfu: failed to %s "
4553123f93dSJagannathan Raman                         "[0x%"PRIx64" - 0x%"PRIx64"] in bar %d",
4563123f93dSJagannathan Raman                         is_write ? "write to" : "read from", bar_offset,
4573123f93dSJagannathan Raman                         (bar_offset + section_size), pci_bar);
4583123f93dSJagannathan Raman             memory_region_unref(section_mr);
4593123f93dSJagannathan Raman             return size;
4603123f93dSJagannathan Raman         }
4613123f93dSJagannathan Raman 
4623123f93dSJagannathan Raman         size += section_size;
4633123f93dSJagannathan Raman         bar_offset += section_size;
4643123f93dSJagannathan Raman         ptr += section_size;
4653123f93dSJagannathan Raman         len -= section_size;
4663123f93dSJagannathan Raman 
4673123f93dSJagannathan Raman         memory_region_unref(section_mr);
4683123f93dSJagannathan Raman     }
4693123f93dSJagannathan Raman 
4703123f93dSJagannathan Raman     return size;
4713123f93dSJagannathan Raman }
4723123f93dSJagannathan Raman 
4733123f93dSJagannathan Raman /**
4743123f93dSJagannathan Raman  * VFU_OBJECT_BAR_HANDLER - macro for defining handlers for PCI BARs.
4753123f93dSJagannathan Raman  *
4763123f93dSJagannathan Raman  * To create handler for BAR number 2, VFU_OBJECT_BAR_HANDLER(2) would
4773123f93dSJagannathan Raman  * define vfu_object_bar2_handler
4783123f93dSJagannathan Raman  */
4793123f93dSJagannathan Raman #define VFU_OBJECT_BAR_HANDLER(BAR_NO)                                         \
4803123f93dSJagannathan Raman     static ssize_t vfu_object_bar##BAR_NO##_handler(vfu_ctx_t *vfu_ctx,        \
4813123f93dSJagannathan Raman                                         char * const buf, size_t count,        \
4823123f93dSJagannathan Raman                                         loff_t offset, const bool is_write)    \
4833123f93dSJagannathan Raman     {                                                                          \
4843123f93dSJagannathan Raman         VfuObject *o = vfu_get_private(vfu_ctx);                               \
4853123f93dSJagannathan Raman         PCIDevice *pci_dev = o->pci_dev;                                       \
4863123f93dSJagannathan Raman                                                                                \
4873123f93dSJagannathan Raman         return vfu_object_bar_rw(pci_dev, BAR_NO, offset,                      \
4883123f93dSJagannathan Raman                                  buf, count, is_write);                        \
4893123f93dSJagannathan Raman     }                                                                          \
4903123f93dSJagannathan Raman 
4913123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(0)
4923123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(1)
4933123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(2)
4943123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(3)
4953123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(4)
4963123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(5)
4973123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(6)
4983123f93dSJagannathan Raman 
4993123f93dSJagannathan Raman static vfu_region_access_cb_t *vfu_object_bar_handlers[PCI_NUM_REGIONS] = {
5003123f93dSJagannathan Raman     &vfu_object_bar0_handler,
5013123f93dSJagannathan Raman     &vfu_object_bar1_handler,
5023123f93dSJagannathan Raman     &vfu_object_bar2_handler,
5033123f93dSJagannathan Raman     &vfu_object_bar3_handler,
5043123f93dSJagannathan Raman     &vfu_object_bar4_handler,
5053123f93dSJagannathan Raman     &vfu_object_bar5_handler,
5063123f93dSJagannathan Raman     &vfu_object_bar6_handler,
5073123f93dSJagannathan Raman };
5083123f93dSJagannathan Raman 
5093123f93dSJagannathan Raman /**
5103123f93dSJagannathan Raman  * vfu_object_register_bars - Identify active BAR regions of pdev and setup
5113123f93dSJagannathan Raman  *                            callbacks to handle read/write accesses
5123123f93dSJagannathan Raman  */
vfu_object_register_bars(vfu_ctx_t * vfu_ctx,PCIDevice * pdev)5133123f93dSJagannathan Raman static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev)
5143123f93dSJagannathan Raman {
5153123f93dSJagannathan Raman     int flags = VFU_REGION_FLAG_RW;
5163123f93dSJagannathan Raman     int i;
5173123f93dSJagannathan Raman 
5183123f93dSJagannathan Raman     for (i = 0; i < PCI_NUM_REGIONS; i++) {
5193123f93dSJagannathan Raman         if (!pdev->io_regions[i].size) {
5203123f93dSJagannathan Raman             continue;
5213123f93dSJagannathan Raman         }
5223123f93dSJagannathan Raman 
5233123f93dSJagannathan Raman         if ((i == VFU_PCI_DEV_ROM_REGION_IDX) ||
5243123f93dSJagannathan Raman             pdev->io_regions[i].memory->readonly) {
5253123f93dSJagannathan Raman             flags &= ~VFU_REGION_FLAG_WRITE;
5263123f93dSJagannathan Raman         }
5273123f93dSJagannathan Raman 
5283123f93dSJagannathan Raman         vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX + i,
5293123f93dSJagannathan Raman                          (size_t)pdev->io_regions[i].size,
5303123f93dSJagannathan Raman                          vfu_object_bar_handlers[i],
5313123f93dSJagannathan Raman                          flags, NULL, 0, -1, 0);
5323123f93dSJagannathan Raman 
5333123f93dSJagannathan Raman         trace_vfu_bar_register(i, pdev->io_regions[i].addr,
5343123f93dSJagannathan Raman                                pdev->io_regions[i].size);
5353123f93dSJagannathan Raman     }
5363123f93dSJagannathan Raman }
5373123f93dSJagannathan Raman 
vfu_object_map_irq(PCIDevice * pci_dev,int intx)53808cf3dc6SJagannathan Raman static int vfu_object_map_irq(PCIDevice *pci_dev, int intx)
53908cf3dc6SJagannathan Raman {
54008cf3dc6SJagannathan Raman     int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
54108cf3dc6SJagannathan Raman                                 pci_dev->devfn);
54208cf3dc6SJagannathan Raman 
54308cf3dc6SJagannathan Raman     return pci_bdf;
54408cf3dc6SJagannathan Raman }
54508cf3dc6SJagannathan Raman 
vfu_object_set_irq(void * opaque,int pirq,int level)54608cf3dc6SJagannathan Raman static void vfu_object_set_irq(void *opaque, int pirq, int level)
54708cf3dc6SJagannathan Raman {
54808cf3dc6SJagannathan Raman     PCIBus *pci_bus = opaque;
54908cf3dc6SJagannathan Raman     PCIDevice *pci_dev = NULL;
55008cf3dc6SJagannathan Raman     vfu_ctx_t *vfu_ctx = NULL;
55108cf3dc6SJagannathan Raman     int pci_bus_num, devfn;
55208cf3dc6SJagannathan Raman 
55308cf3dc6SJagannathan Raman     if (level) {
55408cf3dc6SJagannathan Raman         pci_bus_num = PCI_BUS_NUM(pirq);
55508cf3dc6SJagannathan Raman         devfn = PCI_BDF_TO_DEVFN(pirq);
55608cf3dc6SJagannathan Raman 
55708cf3dc6SJagannathan Raman         /*
55808cf3dc6SJagannathan Raman          * pci_find_device() performs at O(1) if the device is attached
55908cf3dc6SJagannathan Raman          * to the root PCI bus. Whereas, if the device is attached to a
56008cf3dc6SJagannathan Raman          * secondary PCI bus (such as when a root port is involved),
56108cf3dc6SJagannathan Raman          * finding the parent PCI bus could take O(n)
56208cf3dc6SJagannathan Raman          */
56308cf3dc6SJagannathan Raman         pci_dev = pci_find_device(pci_bus, pci_bus_num, devfn);
56408cf3dc6SJagannathan Raman 
56508cf3dc6SJagannathan Raman         vfu_ctx = pci_dev->irq_opaque;
56608cf3dc6SJagannathan Raman 
56708cf3dc6SJagannathan Raman         g_assert(vfu_ctx);
56808cf3dc6SJagannathan Raman 
56908cf3dc6SJagannathan Raman         vfu_irq_trigger(vfu_ctx, 0);
57008cf3dc6SJagannathan Raman     }
57108cf3dc6SJagannathan Raman }
57208cf3dc6SJagannathan Raman 
vfu_object_msi_prepare_msg(PCIDevice * pci_dev,unsigned int vector)57308cf3dc6SJagannathan Raman static MSIMessage vfu_object_msi_prepare_msg(PCIDevice *pci_dev,
57408cf3dc6SJagannathan Raman                                              unsigned int vector)
57508cf3dc6SJagannathan Raman {
57608cf3dc6SJagannathan Raman     MSIMessage msg;
57708cf3dc6SJagannathan Raman 
57808cf3dc6SJagannathan Raman     msg.address = 0;
57908cf3dc6SJagannathan Raman     msg.data = vector;
58008cf3dc6SJagannathan Raman 
58108cf3dc6SJagannathan Raman     return msg;
58208cf3dc6SJagannathan Raman }
58308cf3dc6SJagannathan Raman 
vfu_object_msi_trigger(PCIDevice * pci_dev,MSIMessage msg)58408cf3dc6SJagannathan Raman static void vfu_object_msi_trigger(PCIDevice *pci_dev, MSIMessage msg)
58508cf3dc6SJagannathan Raman {
58608cf3dc6SJagannathan Raman     vfu_ctx_t *vfu_ctx = pci_dev->irq_opaque;
58708cf3dc6SJagannathan Raman 
58808cf3dc6SJagannathan Raman     vfu_irq_trigger(vfu_ctx, msg.data);
58908cf3dc6SJagannathan Raman }
59008cf3dc6SJagannathan Raman 
vfu_object_setup_msi_cbs(VfuObject * o)59108cf3dc6SJagannathan Raman static void vfu_object_setup_msi_cbs(VfuObject *o)
59208cf3dc6SJagannathan Raman {
59308cf3dc6SJagannathan Raman     o->default_msi_trigger = o->pci_dev->msi_trigger;
59408cf3dc6SJagannathan Raman     o->default_msi_prepare_message = o->pci_dev->msi_prepare_message;
59508cf3dc6SJagannathan Raman     o->default_msix_prepare_message = o->pci_dev->msix_prepare_message;
59608cf3dc6SJagannathan Raman 
59708cf3dc6SJagannathan Raman     o->pci_dev->msi_trigger = vfu_object_msi_trigger;
59808cf3dc6SJagannathan Raman     o->pci_dev->msi_prepare_message = vfu_object_msi_prepare_msg;
59908cf3dc6SJagannathan Raman     o->pci_dev->msix_prepare_message = vfu_object_msi_prepare_msg;
60008cf3dc6SJagannathan Raman }
60108cf3dc6SJagannathan Raman 
vfu_object_restore_msi_cbs(VfuObject * o)60208cf3dc6SJagannathan Raman static void vfu_object_restore_msi_cbs(VfuObject *o)
60308cf3dc6SJagannathan Raman {
60408cf3dc6SJagannathan Raman     o->pci_dev->msi_trigger = o->default_msi_trigger;
60508cf3dc6SJagannathan Raman     o->pci_dev->msi_prepare_message = o->default_msi_prepare_message;
60608cf3dc6SJagannathan Raman     o->pci_dev->msix_prepare_message = o->default_msix_prepare_message;
60708cf3dc6SJagannathan Raman }
60808cf3dc6SJagannathan Raman 
vfu_msix_irq_state(vfu_ctx_t * vfu_ctx,uint32_t start,uint32_t count,bool mask)60908cf3dc6SJagannathan Raman static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
61008cf3dc6SJagannathan Raman                                uint32_t count, bool mask)
61108cf3dc6SJagannathan Raman {
61208cf3dc6SJagannathan Raman     VfuObject *o = vfu_get_private(vfu_ctx);
61308cf3dc6SJagannathan Raman     uint32_t vector;
61408cf3dc6SJagannathan Raman 
61508cf3dc6SJagannathan Raman     for (vector = start; vector < count; vector++) {
61615377f6eSAkihiko Odaki         msix_set_mask(o->pci_dev, vector, mask);
61708cf3dc6SJagannathan Raman     }
61808cf3dc6SJagannathan Raman }
61908cf3dc6SJagannathan Raman 
vfu_msi_irq_state(vfu_ctx_t * vfu_ctx,uint32_t start,uint32_t count,bool mask)62008cf3dc6SJagannathan Raman static void vfu_msi_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
62108cf3dc6SJagannathan Raman                               uint32_t count, bool mask)
62208cf3dc6SJagannathan Raman {
62308cf3dc6SJagannathan Raman     VfuObject *o = vfu_get_private(vfu_ctx);
62408cf3dc6SJagannathan Raman     Error *err = NULL;
62508cf3dc6SJagannathan Raman     uint32_t vector;
62608cf3dc6SJagannathan Raman 
62708cf3dc6SJagannathan Raman     for (vector = start; vector < count; vector++) {
62808cf3dc6SJagannathan Raman         msi_set_mask(o->pci_dev, vector, mask, &err);
62908cf3dc6SJagannathan Raman         if (err) {
63008cf3dc6SJagannathan Raman             VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device,
63108cf3dc6SJagannathan Raman                              error_get_pretty(err));
63208cf3dc6SJagannathan Raman             error_free(err);
63308cf3dc6SJagannathan Raman             err = NULL;
63408cf3dc6SJagannathan Raman         }
63508cf3dc6SJagannathan Raman     }
63608cf3dc6SJagannathan Raman }
63708cf3dc6SJagannathan Raman 
vfu_object_setup_irqs(VfuObject * o,PCIDevice * pci_dev)63808cf3dc6SJagannathan Raman static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev)
63908cf3dc6SJagannathan Raman {
64008cf3dc6SJagannathan Raman     vfu_ctx_t *vfu_ctx = o->vfu_ctx;
64108cf3dc6SJagannathan Raman     int ret;
64208cf3dc6SJagannathan Raman 
64308cf3dc6SJagannathan Raman     ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
64408cf3dc6SJagannathan Raman     if (ret < 0) {
64508cf3dc6SJagannathan Raman         return ret;
64608cf3dc6SJagannathan Raman     }
64708cf3dc6SJagannathan Raman 
64808cf3dc6SJagannathan Raman     if (msix_nr_vectors_allocated(pci_dev)) {
64908cf3dc6SJagannathan Raman         ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ,
65008cf3dc6SJagannathan Raman                                        msix_nr_vectors_allocated(pci_dev));
65108cf3dc6SJagannathan Raman         vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSIX_IRQ,
65208cf3dc6SJagannathan Raman                                      &vfu_msix_irq_state);
65308cf3dc6SJagannathan Raman     } else if (msi_nr_vectors_allocated(pci_dev)) {
65408cf3dc6SJagannathan Raman         ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ,
65508cf3dc6SJagannathan Raman                                        msi_nr_vectors_allocated(pci_dev));
65608cf3dc6SJagannathan Raman         vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSI_IRQ,
65708cf3dc6SJagannathan Raman                                      &vfu_msi_irq_state);
65808cf3dc6SJagannathan Raman     }
65908cf3dc6SJagannathan Raman 
66008cf3dc6SJagannathan Raman     if (ret < 0) {
66108cf3dc6SJagannathan Raman         return ret;
66208cf3dc6SJagannathan Raman     }
66308cf3dc6SJagannathan Raman 
66408cf3dc6SJagannathan Raman     vfu_object_setup_msi_cbs(o);
66508cf3dc6SJagannathan Raman 
66608cf3dc6SJagannathan Raman     pci_dev->irq_opaque = vfu_ctx;
66708cf3dc6SJagannathan Raman 
66808cf3dc6SJagannathan Raman     return 0;
66908cf3dc6SJagannathan Raman }
67008cf3dc6SJagannathan Raman 
vfu_object_set_bus_irq(PCIBus * pci_bus)67108cf3dc6SJagannathan Raman void vfu_object_set_bus_irq(PCIBus *pci_bus)
67208cf3dc6SJagannathan Raman {
67308cf3dc6SJagannathan Raman     int bus_num = pci_bus_num(pci_bus);
67408cf3dc6SJagannathan Raman     int max_bdf = PCI_BUILD_BDF(bus_num, PCI_DEVFN_MAX - 1);
67508cf3dc6SJagannathan Raman 
676f021f4e9SBernhard Beschow     pci_bus_irqs(pci_bus, vfu_object_set_irq, pci_bus, max_bdf);
677f021f4e9SBernhard Beschow     pci_bus_map_irqs(pci_bus, vfu_object_map_irq);
67808cf3dc6SJagannathan Raman }
67908cf3dc6SJagannathan Raman 
vfu_object_device_reset(vfu_ctx_t * vfu_ctx,vfu_reset_type_t type)68078e27dfaSJagannathan Raman static int vfu_object_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t type)
68178e27dfaSJagannathan Raman {
68278e27dfaSJagannathan Raman     VfuObject *o = vfu_get_private(vfu_ctx);
68378e27dfaSJagannathan Raman 
68478e27dfaSJagannathan Raman     /* vfu_object_ctx_run() handles lost connection */
68578e27dfaSJagannathan Raman     if (type == VFU_RESET_LOST_CONN) {
68678e27dfaSJagannathan Raman         return 0;
68778e27dfaSJagannathan Raman     }
68878e27dfaSJagannathan Raman 
689dfa6ba6bSPeter Maydell     device_cold_reset(DEVICE(o->pci_dev));
69078e27dfaSJagannathan Raman 
69178e27dfaSJagannathan Raman     return 0;
69278e27dfaSJagannathan Raman }
69378e27dfaSJagannathan Raman 
69487f7249fSJagannathan Raman /*
69587f7249fSJagannathan Raman  * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
69687f7249fSJagannathan Raman  * properties. It also depends on devices instantiated in QEMU. These
69787f7249fSJagannathan Raman  * dependencies are not available during the instance_init phase of this
69887f7249fSJagannathan Raman  * object's life-cycle. As such, the server is initialized after the
69987f7249fSJagannathan Raman  * machine is setup. machine_init_done_notifier notifies TYPE_VFU_OBJECT
70087f7249fSJagannathan Raman  * when the machine is setup, and the dependencies are available.
70187f7249fSJagannathan Raman  */
vfu_object_machine_done(Notifier * notifier,void * data)70287f7249fSJagannathan Raman static void vfu_object_machine_done(Notifier *notifier, void *data)
70387f7249fSJagannathan Raman {
70487f7249fSJagannathan Raman     VfuObject *o = container_of(notifier, VfuObject, machine_done);
70587f7249fSJagannathan Raman     Error *err = NULL;
70687f7249fSJagannathan Raman 
70787f7249fSJagannathan Raman     vfu_object_init_ctx(o, &err);
70887f7249fSJagannathan Raman 
70987f7249fSJagannathan Raman     if (err) {
71087f7249fSJagannathan Raman         error_propagate(&error_abort, err);
71187f7249fSJagannathan Raman     }
71287f7249fSJagannathan Raman }
71387f7249fSJagannathan Raman 
7149fb3fba1SJagannathan Raman /**
7159fb3fba1SJagannathan Raman  * vfu_object_init_ctx: Create and initialize libvfio-user context. Add
7169fb3fba1SJagannathan Raman  *     an unplug blocker for the associated PCI device. Setup a FD handler
7179fb3fba1SJagannathan Raman  *     to process incoming messages in the context's socket.
7189fb3fba1SJagannathan Raman  *
7199fb3fba1SJagannathan Raman  *     The socket and device properties are mandatory, and this function
7209fb3fba1SJagannathan Raman  *     will not create the context without them - the setters for these
7219fb3fba1SJagannathan Raman  *     properties should call this function when the property is set. The
7229fb3fba1SJagannathan Raman  *     machine should also be ready when this function is invoked - it is
7239fb3fba1SJagannathan Raman  *     because QEMU objects are initialized before devices, and the
7249fb3fba1SJagannathan Raman  *     associated PCI device wouldn't be available at the object
7259fb3fba1SJagannathan Raman  *     initialization time. Until these conditions are satisfied, this
7269fb3fba1SJagannathan Raman  *     function would return early without performing any task.
7279fb3fba1SJagannathan Raman  */
vfu_object_init_ctx(VfuObject * o,Error ** errp)72887f7249fSJagannathan Raman static void vfu_object_init_ctx(VfuObject *o, Error **errp)
72987f7249fSJagannathan Raman {
730a6e8d6d9SJagannathan Raman     DeviceState *dev = NULL;
731a6e8d6d9SJagannathan Raman     vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL;
732a6e8d6d9SJagannathan Raman     int ret;
73387f7249fSJagannathan Raman 
73487f7249fSJagannathan Raman     if (o->vfu_ctx || !o->socket || !o->device ||
73587f7249fSJagannathan Raman             !phase_check(PHASE_MACHINE_READY)) {
73687f7249fSJagannathan Raman         return;
73787f7249fSJagannathan Raman     }
73887f7249fSJagannathan Raman 
73987f7249fSJagannathan Raman     if (o->err) {
74087f7249fSJagannathan Raman         error_propagate(errp, o->err);
74187f7249fSJagannathan Raman         o->err = NULL;
74287f7249fSJagannathan Raman         return;
74387f7249fSJagannathan Raman     }
74487f7249fSJagannathan Raman 
7459fb3fba1SJagannathan Raman     o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path,
7469fb3fba1SJagannathan Raman                                 LIBVFIO_USER_FLAG_ATTACH_NB,
74787f7249fSJagannathan Raman                                 o, VFU_DEV_TYPE_PCI);
74887f7249fSJagannathan Raman     if (o->vfu_ctx == NULL) {
74987f7249fSJagannathan Raman         error_setg(errp, "vfu: Failed to create context - %s", strerror(errno));
75087f7249fSJagannathan Raman         return;
75187f7249fSJagannathan Raman     }
752a6e8d6d9SJagannathan Raman 
753a6e8d6d9SJagannathan Raman     dev = qdev_find_recursive(sysbus_get_default(), o->device);
754a6e8d6d9SJagannathan Raman     if (dev == NULL) {
755a6e8d6d9SJagannathan Raman         error_setg(errp, "vfu: Device %s not found", o->device);
756a6e8d6d9SJagannathan Raman         goto fail;
757a6e8d6d9SJagannathan Raman     }
758a6e8d6d9SJagannathan Raman 
759a6e8d6d9SJagannathan Raman     if (!object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
760a6e8d6d9SJagannathan Raman         error_setg(errp, "vfu: %s not a PCI device", o->device);
761a6e8d6d9SJagannathan Raman         goto fail;
762a6e8d6d9SJagannathan Raman     }
763a6e8d6d9SJagannathan Raman 
764a6e8d6d9SJagannathan Raman     o->pci_dev = PCI_DEVICE(dev);
765a6e8d6d9SJagannathan Raman 
766a6e8d6d9SJagannathan Raman     object_ref(OBJECT(o->pci_dev));
767a6e8d6d9SJagannathan Raman 
768a6e8d6d9SJagannathan Raman     if (pci_is_express(o->pci_dev)) {
769a6e8d6d9SJagannathan Raman         pci_type = VFU_PCI_TYPE_EXPRESS;
770a6e8d6d9SJagannathan Raman     }
771a6e8d6d9SJagannathan Raman 
772a6e8d6d9SJagannathan Raman     ret = vfu_pci_init(o->vfu_ctx, pci_type, PCI_HEADER_TYPE_NORMAL, 0);
773a6e8d6d9SJagannathan Raman     if (ret < 0) {
774a6e8d6d9SJagannathan Raman         error_setg(errp,
775a6e8d6d9SJagannathan Raman                    "vfu: Failed to attach PCI device %s to context - %s",
776a6e8d6d9SJagannathan Raman                    o->device, strerror(errno));
777a6e8d6d9SJagannathan Raman         goto fail;
778a6e8d6d9SJagannathan Raman     }
779a6e8d6d9SJagannathan Raman 
780a6e8d6d9SJagannathan Raman     error_setg(&o->unplug_blocker,
781a6e8d6d9SJagannathan Raman                "vfu: %s for %s must be deleted before unplugging",
782a6e8d6d9SJagannathan Raman                TYPE_VFU_OBJECT, o->device);
783a6e8d6d9SJagannathan Raman     qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
784a6e8d6d9SJagannathan Raman 
78590072f29SJagannathan Raman     ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX,
78690072f29SJagannathan Raman                            pci_config_size(o->pci_dev), &vfu_object_cfg_access,
78790072f29SJagannathan Raman                            VFU_REGION_FLAG_RW | VFU_REGION_FLAG_ALWAYS_CB,
78890072f29SJagannathan Raman                            NULL, 0, -1, 0);
78990072f29SJagannathan Raman     if (ret < 0) {
79090072f29SJagannathan Raman         error_setg(errp,
79190072f29SJagannathan Raman                    "vfu: Failed to setup config space handlers for %s- %s",
79290072f29SJagannathan Raman                    o->device, strerror(errno));
79390072f29SJagannathan Raman         goto fail;
79490072f29SJagannathan Raman     }
79590072f29SJagannathan Raman 
79615ccf9beSJagannathan Raman     ret = vfu_setup_device_dma(o->vfu_ctx, &dma_register, &dma_unregister);
79715ccf9beSJagannathan Raman     if (ret < 0) {
79815ccf9beSJagannathan Raman         error_setg(errp, "vfu: Failed to setup DMA handlers for %s",
79915ccf9beSJagannathan Raman                    o->device);
80015ccf9beSJagannathan Raman         goto fail;
80115ccf9beSJagannathan Raman     }
80215ccf9beSJagannathan Raman 
8033123f93dSJagannathan Raman     vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
8043123f93dSJagannathan Raman 
80508cf3dc6SJagannathan Raman     ret = vfu_object_setup_irqs(o, o->pci_dev);
80608cf3dc6SJagannathan Raman     if (ret < 0) {
80708cf3dc6SJagannathan Raman         error_setg(errp, "vfu: Failed to setup interrupts for %s",
80808cf3dc6SJagannathan Raman                    o->device);
80908cf3dc6SJagannathan Raman         goto fail;
81008cf3dc6SJagannathan Raman     }
81108cf3dc6SJagannathan Raman 
81278e27dfaSJagannathan Raman     ret = vfu_setup_device_reset_cb(o->vfu_ctx, &vfu_object_device_reset);
81378e27dfaSJagannathan Raman     if (ret < 0) {
81478e27dfaSJagannathan Raman         error_setg(errp, "vfu: Failed to setup reset callback");
81578e27dfaSJagannathan Raman         goto fail;
81678e27dfaSJagannathan Raman     }
81778e27dfaSJagannathan Raman 
8189fb3fba1SJagannathan Raman     ret = vfu_realize_ctx(o->vfu_ctx);
8199fb3fba1SJagannathan Raman     if (ret < 0) {
8209fb3fba1SJagannathan Raman         error_setg(errp, "vfu: Failed to realize device %s- %s",
8219fb3fba1SJagannathan Raman                    o->device, strerror(errno));
8229fb3fba1SJagannathan Raman         goto fail;
8239fb3fba1SJagannathan Raman     }
8249fb3fba1SJagannathan Raman 
8259fb3fba1SJagannathan Raman     o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
8269fb3fba1SJagannathan Raman     if (o->vfu_poll_fd < 0) {
8279fb3fba1SJagannathan Raman         error_setg(errp, "vfu: Failed to get poll fd %s", o->device);
8289fb3fba1SJagannathan Raman         goto fail;
8299fb3fba1SJagannathan Raman     }
8309fb3fba1SJagannathan Raman 
8319fb3fba1SJagannathan Raman     qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o);
8329fb3fba1SJagannathan Raman 
833a6e8d6d9SJagannathan Raman     return;
834a6e8d6d9SJagannathan Raman 
835a6e8d6d9SJagannathan Raman fail:
836a6e8d6d9SJagannathan Raman     vfu_destroy_ctx(o->vfu_ctx);
837a6e8d6d9SJagannathan Raman     if (o->unplug_blocker && o->pci_dev) {
838a6e8d6d9SJagannathan Raman         qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
839a6e8d6d9SJagannathan Raman         error_free(o->unplug_blocker);
840a6e8d6d9SJagannathan Raman         o->unplug_blocker = NULL;
841a6e8d6d9SJagannathan Raman     }
842a6e8d6d9SJagannathan Raman     if (o->pci_dev) {
84308cf3dc6SJagannathan Raman         vfu_object_restore_msi_cbs(o);
84408cf3dc6SJagannathan Raman         o->pci_dev->irq_opaque = NULL;
845a6e8d6d9SJagannathan Raman         object_unref(OBJECT(o->pci_dev));
846a6e8d6d9SJagannathan Raman         o->pci_dev = NULL;
847a6e8d6d9SJagannathan Raman     }
848a6e8d6d9SJagannathan Raman     o->vfu_ctx = NULL;
8498f9a9259SJagannathan Raman }
8508f9a9259SJagannathan Raman 
vfu_object_init(Object * obj)8518f9a9259SJagannathan Raman static void vfu_object_init(Object *obj)
8528f9a9259SJagannathan Raman {
8538f9a9259SJagannathan Raman     VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
8548f9a9259SJagannathan Raman     VfuObject *o = VFU_OBJECT(obj);
8558f9a9259SJagannathan Raman 
8568f9a9259SJagannathan Raman     k->nr_devs++;
8578f9a9259SJagannathan Raman 
8588f9a9259SJagannathan Raman     if (!object_dynamic_cast(OBJECT(current_machine), TYPE_REMOTE_MACHINE)) {
8598f9a9259SJagannathan Raman         error_setg(&o->err, "vfu: %s only compatible with %s machine",
8608f9a9259SJagannathan Raman                    TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE);
8618f9a9259SJagannathan Raman         return;
8628f9a9259SJagannathan Raman     }
86387f7249fSJagannathan Raman 
86487f7249fSJagannathan Raman     if (!phase_check(PHASE_MACHINE_READY)) {
86587f7249fSJagannathan Raman         o->machine_done.notify = vfu_object_machine_done;
86687f7249fSJagannathan Raman         qemu_add_machine_init_done_notifier(&o->machine_done);
86787f7249fSJagannathan Raman     }
86887f7249fSJagannathan Raman 
8699fb3fba1SJagannathan Raman     o->vfu_poll_fd = -1;
8708f9a9259SJagannathan Raman }
8718f9a9259SJagannathan Raman 
vfu_object_finalize(Object * obj)8728f9a9259SJagannathan Raman static void vfu_object_finalize(Object *obj)
8738f9a9259SJagannathan Raman {
8748f9a9259SJagannathan Raman     VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
8758f9a9259SJagannathan Raman     VfuObject *o = VFU_OBJECT(obj);
8768f9a9259SJagannathan Raman 
8778f9a9259SJagannathan Raman     k->nr_devs--;
8788f9a9259SJagannathan Raman 
8798f9a9259SJagannathan Raman     qapi_free_SocketAddress(o->socket);
8808f9a9259SJagannathan Raman 
8818f9a9259SJagannathan Raman     o->socket = NULL;
8828f9a9259SJagannathan Raman 
8839fb3fba1SJagannathan Raman     if (o->vfu_poll_fd != -1) {
8849fb3fba1SJagannathan Raman         qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
8859fb3fba1SJagannathan Raman         o->vfu_poll_fd = -1;
8869fb3fba1SJagannathan Raman     }
8879fb3fba1SJagannathan Raman 
88887f7249fSJagannathan Raman     if (o->vfu_ctx) {
88987f7249fSJagannathan Raman         vfu_destroy_ctx(o->vfu_ctx);
89087f7249fSJagannathan Raman         o->vfu_ctx = NULL;
89187f7249fSJagannathan Raman     }
89287f7249fSJagannathan Raman 
8938f9a9259SJagannathan Raman     g_free(o->device);
8948f9a9259SJagannathan Raman 
8958f9a9259SJagannathan Raman     o->device = NULL;
8968f9a9259SJagannathan Raman 
897a6e8d6d9SJagannathan Raman     if (o->unplug_blocker && o->pci_dev) {
898a6e8d6d9SJagannathan Raman         qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
899a6e8d6d9SJagannathan Raman         error_free(o->unplug_blocker);
900a6e8d6d9SJagannathan Raman         o->unplug_blocker = NULL;
901a6e8d6d9SJagannathan Raman     }
902a6e8d6d9SJagannathan Raman 
903a6e8d6d9SJagannathan Raman     if (o->pci_dev) {
90408cf3dc6SJagannathan Raman         vfu_object_restore_msi_cbs(o);
90508cf3dc6SJagannathan Raman         o->pci_dev->irq_opaque = NULL;
906a6e8d6d9SJagannathan Raman         object_unref(OBJECT(o->pci_dev));
907a6e8d6d9SJagannathan Raman         o->pci_dev = NULL;
908a6e8d6d9SJagannathan Raman     }
909a6e8d6d9SJagannathan Raman 
9108f9a9259SJagannathan Raman     if (!k->nr_devs && vfu_object_auto_shutdown()) {
9118f9a9259SJagannathan Raman         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
9128f9a9259SJagannathan Raman     }
91387f7249fSJagannathan Raman 
91487f7249fSJagannathan Raman     if (o->machine_done.notify) {
91587f7249fSJagannathan Raman         qemu_remove_machine_init_done_notifier(&o->machine_done);
91687f7249fSJagannathan Raman         o->machine_done.notify = NULL;
91787f7249fSJagannathan Raman     }
9188f9a9259SJagannathan Raman }
9198f9a9259SJagannathan Raman 
vfu_object_class_init(ObjectClass * klass,void * data)9208f9a9259SJagannathan Raman static void vfu_object_class_init(ObjectClass *klass, void *data)
9218f9a9259SJagannathan Raman {
9228f9a9259SJagannathan Raman     VfuObjectClass *k = VFU_OBJECT_CLASS(klass);
9238f9a9259SJagannathan Raman 
9248f9a9259SJagannathan Raman     k->nr_devs = 0;
9258f9a9259SJagannathan Raman 
9268f9a9259SJagannathan Raman     object_class_property_add(klass, "socket", "SocketAddress", NULL,
9278f9a9259SJagannathan Raman                               vfu_object_set_socket, NULL, NULL);
9288f9a9259SJagannathan Raman     object_class_property_set_description(klass, "socket",
9298f9a9259SJagannathan Raman                                           "SocketAddress "
9308f9a9259SJagannathan Raman                                           "(ex: type=unix,path=/tmp/sock). "
9318f9a9259SJagannathan Raman                                           "Only UNIX is presently supported");
9328f9a9259SJagannathan Raman     object_class_property_add_str(klass, "device", NULL,
9338f9a9259SJagannathan Raman                                   vfu_object_set_device);
9348f9a9259SJagannathan Raman     object_class_property_set_description(klass, "device",
9358f9a9259SJagannathan Raman                                           "device ID - only PCI devices "
9368f9a9259SJagannathan Raman                                           "are presently supported");
9378f9a9259SJagannathan Raman }
9388f9a9259SJagannathan Raman 
9398f9a9259SJagannathan Raman static const TypeInfo vfu_object_info = {
9408f9a9259SJagannathan Raman     .name = TYPE_VFU_OBJECT,
9418f9a9259SJagannathan Raman     .parent = TYPE_OBJECT,
9428f9a9259SJagannathan Raman     .instance_size = sizeof(VfuObject),
9438f9a9259SJagannathan Raman     .instance_init = vfu_object_init,
9448f9a9259SJagannathan Raman     .instance_finalize = vfu_object_finalize,
9458f9a9259SJagannathan Raman     .class_size = sizeof(VfuObjectClass),
9468f9a9259SJagannathan Raman     .class_init = vfu_object_class_init,
9478f9a9259SJagannathan Raman     .interfaces = (InterfaceInfo[]) {
9488f9a9259SJagannathan Raman         { TYPE_USER_CREATABLE },
9498f9a9259SJagannathan Raman         { }
9508f9a9259SJagannathan Raman     }
9518f9a9259SJagannathan Raman };
9528f9a9259SJagannathan Raman 
vfu_register_types(void)9538f9a9259SJagannathan Raman static void vfu_register_types(void)
9548f9a9259SJagannathan Raman {
9558f9a9259SJagannathan Raman     type_register_static(&vfu_object_info);
9568f9a9259SJagannathan Raman }
9578f9a9259SJagannathan Raman 
9588f9a9259SJagannathan Raman type_init(vfu_register_types);
959