18f9a9259SJagannathan Raman /**
28f9a9259SJagannathan Raman * QEMU vfio-user-server server object
38f9a9259SJagannathan Raman *
48f9a9259SJagannathan Raman * Copyright © 2022 Oracle and/or its affiliates.
58f9a9259SJagannathan Raman *
68f9a9259SJagannathan Raman * This work is licensed under the terms of the GNU GPL-v2, version 2 or later.
78f9a9259SJagannathan Raman *
88f9a9259SJagannathan Raman * See the COPYING file in the top-level directory.
98f9a9259SJagannathan Raman *
108f9a9259SJagannathan Raman */
118f9a9259SJagannathan Raman
128f9a9259SJagannathan Raman /**
138f9a9259SJagannathan Raman * Usage: add options:
148f9a9259SJagannathan Raman * -machine x-remote,vfio-user=on,auto-shutdown=on
158f9a9259SJagannathan Raman * -device <PCI-device>,id=<pci-dev-id>
168f9a9259SJagannathan Raman * -object x-vfio-user-server,id=<id>,type=unix,path=<socket-path>,
178f9a9259SJagannathan Raman * device=<pci-dev-id>
188f9a9259SJagannathan Raman *
198f9a9259SJagannathan Raman * Note that x-vfio-user-server object must be used with x-remote machine only.
208f9a9259SJagannathan Raman * This server could only support PCI devices for now.
218f9a9259SJagannathan Raman *
228f9a9259SJagannathan Raman * type - SocketAddress type - presently "unix" alone is supported. Required
238f9a9259SJagannathan Raman * option
248f9a9259SJagannathan Raman *
258f9a9259SJagannathan Raman * path - named unix socket, it will be created by the server. It is
268f9a9259SJagannathan Raman * a required option
278f9a9259SJagannathan Raman *
288f9a9259SJagannathan Raman * device - id of a device on the server, a required option. PCI devices
298f9a9259SJagannathan Raman * alone are supported presently.
309fb3fba1SJagannathan Raman *
319fb3fba1SJagannathan Raman * notes - x-vfio-user-server could block IO and monitor during the
329fb3fba1SJagannathan Raman * initialization phase.
33c1454758SJagannathan Raman *
34c1454758SJagannathan Raman * When x-remote machine has the auto-shutdown property
35c1454758SJagannathan Raman * enabled (default), x-vfio-user-server terminates after the last
36c1454758SJagannathan Raman * client disconnects. Otherwise, it will continue running until
37c1454758SJagannathan Raman * explicitly killed.
388f9a9259SJagannathan Raman */
398f9a9259SJagannathan Raman
408f9a9259SJagannathan Raman #include "qemu/osdep.h"
418f9a9259SJagannathan Raman
428f9a9259SJagannathan Raman #include "qom/object.h"
438f9a9259SJagannathan Raman #include "qom/object_interfaces.h"
448f9a9259SJagannathan Raman #include "qemu/error-report.h"
458f9a9259SJagannathan Raman #include "trace.h"
468f9a9259SJagannathan Raman #include "sysemu/runstate.h"
478f9a9259SJagannathan Raman #include "hw/boards.h"
488f9a9259SJagannathan Raman #include "hw/remote/machine.h"
498f9a9259SJagannathan Raman #include "qapi/error.h"
508f9a9259SJagannathan Raman #include "qapi/qapi-visit-sockets.h"
519fb3fba1SJagannathan Raman #include "qapi/qapi-events-misc.h"
5287f7249fSJagannathan Raman #include "qemu/notify.h"
539fb3fba1SJagannathan Raman #include "qemu/thread.h"
5490072f29SJagannathan Raman #include "qemu/main-loop.h"
5587f7249fSJagannathan Raman #include "sysemu/sysemu.h"
5687f7249fSJagannathan Raman #include "libvfio-user.h"
57a6e8d6d9SJagannathan Raman #include "hw/qdev-core.h"
58a6e8d6d9SJagannathan Raman #include "hw/pci/pci.h"
599fb3fba1SJagannathan Raman #include "qemu/timer.h"
603123f93dSJagannathan Raman #include "exec/memory.h"
6108cf3dc6SJagannathan Raman #include "hw/pci/msi.h"
6208cf3dc6SJagannathan Raman #include "hw/pci/msix.h"
6308cf3dc6SJagannathan Raman #include "hw/remote/vfio-user-obj.h"
648f9a9259SJagannathan Raman
658f9a9259SJagannathan Raman #define TYPE_VFU_OBJECT "x-vfio-user-server"
668f9a9259SJagannathan Raman OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
678f9a9259SJagannathan Raman
688f9a9259SJagannathan Raman /**
69c1454758SJagannathan Raman * VFU_OBJECT_ERROR - reports an error message.
70c1454758SJagannathan Raman *
71c1454758SJagannathan Raman * If auto_shutdown is set, it aborts the machine on error. Otherwise,
72c1454758SJagannathan Raman * it logs an error message without aborting. auto_shutdown is disabled
73c1454758SJagannathan Raman * when the server serves clients from multiple VMs; as such, an error
74c1454758SJagannathan Raman * from one VM shouldn't be able to disrupt other VM's services.
758f9a9259SJagannathan Raman */
768f9a9259SJagannathan Raman #define VFU_OBJECT_ERROR(o, fmt, ...) \
778f9a9259SJagannathan Raman { \
788f9a9259SJagannathan Raman if (vfu_object_auto_shutdown()) { \
798f9a9259SJagannathan Raman error_setg(&error_abort, (fmt), ## __VA_ARGS__); \
808f9a9259SJagannathan Raman } else { \
818f9a9259SJagannathan Raman error_report((fmt), ## __VA_ARGS__); \
828f9a9259SJagannathan Raman } \
838f9a9259SJagannathan Raman } \
848f9a9259SJagannathan Raman
858f9a9259SJagannathan Raman struct VfuObjectClass {
868f9a9259SJagannathan Raman ObjectClass parent_class;
878f9a9259SJagannathan Raman
888f9a9259SJagannathan Raman unsigned int nr_devs;
898f9a9259SJagannathan Raman };
908f9a9259SJagannathan Raman
918f9a9259SJagannathan Raman struct VfuObject {
928f9a9259SJagannathan Raman /* private */
938f9a9259SJagannathan Raman Object parent;
948f9a9259SJagannathan Raman
958f9a9259SJagannathan Raman SocketAddress *socket;
968f9a9259SJagannathan Raman
978f9a9259SJagannathan Raman char *device;
988f9a9259SJagannathan Raman
998f9a9259SJagannathan Raman Error *err;
10087f7249fSJagannathan Raman
10187f7249fSJagannathan Raman Notifier machine_done;
10287f7249fSJagannathan Raman
10387f7249fSJagannathan Raman vfu_ctx_t *vfu_ctx;
104a6e8d6d9SJagannathan Raman
105a6e8d6d9SJagannathan Raman PCIDevice *pci_dev;
106a6e8d6d9SJagannathan Raman
107a6e8d6d9SJagannathan Raman Error *unplug_blocker;
1089fb3fba1SJagannathan Raman
1099fb3fba1SJagannathan Raman int vfu_poll_fd;
11008cf3dc6SJagannathan Raman
11108cf3dc6SJagannathan Raman MSITriggerFunc *default_msi_trigger;
11208cf3dc6SJagannathan Raman MSIPrepareMessageFunc *default_msi_prepare_message;
11308cf3dc6SJagannathan Raman MSIxPrepareMessageFunc *default_msix_prepare_message;
1148f9a9259SJagannathan Raman };
1158f9a9259SJagannathan Raman
11687f7249fSJagannathan Raman static void vfu_object_init_ctx(VfuObject *o, Error **errp);
11787f7249fSJagannathan Raman
vfu_object_auto_shutdown(void)1188f9a9259SJagannathan Raman static bool vfu_object_auto_shutdown(void)
1198f9a9259SJagannathan Raman {
1208f9a9259SJagannathan Raman bool auto_shutdown = true;
1218f9a9259SJagannathan Raman Error *local_err = NULL;
1228f9a9259SJagannathan Raman
1238f9a9259SJagannathan Raman if (!current_machine) {
1248f9a9259SJagannathan Raman return auto_shutdown;
1258f9a9259SJagannathan Raman }
1268f9a9259SJagannathan Raman
1278f9a9259SJagannathan Raman auto_shutdown = object_property_get_bool(OBJECT(current_machine),
1288f9a9259SJagannathan Raman "auto-shutdown",
1298f9a9259SJagannathan Raman &local_err);
1308f9a9259SJagannathan Raman
1318f9a9259SJagannathan Raman /*
1328f9a9259SJagannathan Raman * local_err would be set if no such property exists - safe to ignore.
1338f9a9259SJagannathan Raman * Unlikely scenario as auto-shutdown is always defined for
1348f9a9259SJagannathan Raman * TYPE_REMOTE_MACHINE, and TYPE_VFU_OBJECT only works with
1358f9a9259SJagannathan Raman * TYPE_REMOTE_MACHINE
1368f9a9259SJagannathan Raman */
1378f9a9259SJagannathan Raman if (local_err) {
1388f9a9259SJagannathan Raman auto_shutdown = true;
1398f9a9259SJagannathan Raman error_free(local_err);
1408f9a9259SJagannathan Raman }
1418f9a9259SJagannathan Raman
1428f9a9259SJagannathan Raman return auto_shutdown;
1438f9a9259SJagannathan Raman }
1448f9a9259SJagannathan Raman
vfu_object_set_socket(Object * obj,Visitor * v,const char * name,void * opaque,Error ** errp)1458f9a9259SJagannathan Raman static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name,
1468f9a9259SJagannathan Raman void *opaque, Error **errp)
1478f9a9259SJagannathan Raman {
1488f9a9259SJagannathan Raman VfuObject *o = VFU_OBJECT(obj);
1498f9a9259SJagannathan Raman
15087f7249fSJagannathan Raman if (o->vfu_ctx) {
15187f7249fSJagannathan Raman error_setg(errp, "vfu: Unable to set socket property - server busy");
15287f7249fSJagannathan Raman return;
15387f7249fSJagannathan Raman }
15487f7249fSJagannathan Raman
1558f9a9259SJagannathan Raman qapi_free_SocketAddress(o->socket);
1568f9a9259SJagannathan Raman
1578f9a9259SJagannathan Raman o->socket = NULL;
1588f9a9259SJagannathan Raman
1598f9a9259SJagannathan Raman visit_type_SocketAddress(v, name, &o->socket, errp);
1608f9a9259SJagannathan Raman
1618f9a9259SJagannathan Raman if (o->socket->type != SOCKET_ADDRESS_TYPE_UNIX) {
1628f9a9259SJagannathan Raman error_setg(errp, "vfu: Unsupported socket type - %s",
1638f9a9259SJagannathan Raman SocketAddressType_str(o->socket->type));
1648f9a9259SJagannathan Raman qapi_free_SocketAddress(o->socket);
1658f9a9259SJagannathan Raman o->socket = NULL;
1668f9a9259SJagannathan Raman return;
1678f9a9259SJagannathan Raman }
1688f9a9259SJagannathan Raman
1698f9a9259SJagannathan Raman trace_vfu_prop("socket", o->socket->u.q_unix.path);
17087f7249fSJagannathan Raman
17187f7249fSJagannathan Raman vfu_object_init_ctx(o, errp);
1728f9a9259SJagannathan Raman }
1738f9a9259SJagannathan Raman
vfu_object_set_device(Object * obj,const char * str,Error ** errp)1748f9a9259SJagannathan Raman static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
1758f9a9259SJagannathan Raman {
1768f9a9259SJagannathan Raman VfuObject *o = VFU_OBJECT(obj);
1778f9a9259SJagannathan Raman
17887f7249fSJagannathan Raman if (o->vfu_ctx) {
17987f7249fSJagannathan Raman error_setg(errp, "vfu: Unable to set device property - server busy");
18087f7249fSJagannathan Raman return;
18187f7249fSJagannathan Raman }
18287f7249fSJagannathan Raman
1838f9a9259SJagannathan Raman g_free(o->device);
1848f9a9259SJagannathan Raman
1858f9a9259SJagannathan Raman o->device = g_strdup(str);
1868f9a9259SJagannathan Raman
1878f9a9259SJagannathan Raman trace_vfu_prop("device", str);
18887f7249fSJagannathan Raman
18987f7249fSJagannathan Raman vfu_object_init_ctx(o, errp);
19087f7249fSJagannathan Raman }
19187f7249fSJagannathan Raman
vfu_object_ctx_run(void * opaque)1929fb3fba1SJagannathan Raman static void vfu_object_ctx_run(void *opaque)
1939fb3fba1SJagannathan Raman {
1949fb3fba1SJagannathan Raman VfuObject *o = opaque;
1959fb3fba1SJagannathan Raman const char *vfu_id;
1969fb3fba1SJagannathan Raman char *vfu_path, *pci_dev_path;
1979fb3fba1SJagannathan Raman int ret = -1;
1989fb3fba1SJagannathan Raman
1999fb3fba1SJagannathan Raman while (ret != 0) {
2009fb3fba1SJagannathan Raman ret = vfu_run_ctx(o->vfu_ctx);
2019fb3fba1SJagannathan Raman if (ret < 0) {
2029fb3fba1SJagannathan Raman if (errno == EINTR) {
2039fb3fba1SJagannathan Raman continue;
2049fb3fba1SJagannathan Raman } else if (errno == ENOTCONN) {
2059fb3fba1SJagannathan Raman vfu_id = object_get_canonical_path_component(OBJECT(o));
2069fb3fba1SJagannathan Raman vfu_path = object_get_canonical_path(OBJECT(o));
2079fb3fba1SJagannathan Raman g_assert(o->pci_dev);
2089fb3fba1SJagannathan Raman pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
2099fb3fba1SJagannathan Raman /* o->device is a required property and is non-NULL here */
2109fb3fba1SJagannathan Raman g_assert(o->device);
2119fb3fba1SJagannathan Raman qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
2129fb3fba1SJagannathan Raman o->device, pci_dev_path);
2139fb3fba1SJagannathan Raman qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
2149fb3fba1SJagannathan Raman o->vfu_poll_fd = -1;
2159fb3fba1SJagannathan Raman object_unparent(OBJECT(o));
2169fb3fba1SJagannathan Raman g_free(vfu_path);
2179fb3fba1SJagannathan Raman g_free(pci_dev_path);
2189fb3fba1SJagannathan Raman break;
2199fb3fba1SJagannathan Raman } else {
2209fb3fba1SJagannathan Raman VFU_OBJECT_ERROR(o, "vfu: Failed to run device %s - %s",
2219fb3fba1SJagannathan Raman o->device, strerror(errno));
2229fb3fba1SJagannathan Raman break;
2239fb3fba1SJagannathan Raman }
2249fb3fba1SJagannathan Raman }
2259fb3fba1SJagannathan Raman }
2269fb3fba1SJagannathan Raman }
2279fb3fba1SJagannathan Raman
vfu_object_attach_ctx(void * opaque)2289fb3fba1SJagannathan Raman static void vfu_object_attach_ctx(void *opaque)
2299fb3fba1SJagannathan Raman {
2309fb3fba1SJagannathan Raman VfuObject *o = opaque;
2319fb3fba1SJagannathan Raman GPollFD pfds[1];
2329fb3fba1SJagannathan Raman int ret;
2339fb3fba1SJagannathan Raman
2349fb3fba1SJagannathan Raman qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
2359fb3fba1SJagannathan Raman
2369fb3fba1SJagannathan Raman pfds[0].fd = o->vfu_poll_fd;
2379fb3fba1SJagannathan Raman pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
2389fb3fba1SJagannathan Raman
2399fb3fba1SJagannathan Raman retry_attach:
2409fb3fba1SJagannathan Raman ret = vfu_attach_ctx(o->vfu_ctx);
2419fb3fba1SJagannathan Raman if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
2429fb3fba1SJagannathan Raman /**
2439fb3fba1SJagannathan Raman * vfu_object_attach_ctx can block QEMU's main loop
2449fb3fba1SJagannathan Raman * during attach - the monitor and other IO
2459fb3fba1SJagannathan Raman * could be unresponsive during this time.
2469fb3fba1SJagannathan Raman */
2479fb3fba1SJagannathan Raman (void)qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
2489fb3fba1SJagannathan Raman goto retry_attach;
2499fb3fba1SJagannathan Raman } else if (ret < 0) {
2509fb3fba1SJagannathan Raman VFU_OBJECT_ERROR(o, "vfu: Failed to attach device %s to context - %s",
2519fb3fba1SJagannathan Raman o->device, strerror(errno));
2529fb3fba1SJagannathan Raman return;
2539fb3fba1SJagannathan Raman }
2549fb3fba1SJagannathan Raman
2559fb3fba1SJagannathan Raman o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
2569fb3fba1SJagannathan Raman if (o->vfu_poll_fd < 0) {
2579fb3fba1SJagannathan Raman VFU_OBJECT_ERROR(o, "vfu: Failed to get poll fd %s", o->device);
2589fb3fba1SJagannathan Raman return;
2599fb3fba1SJagannathan Raman }
2609fb3fba1SJagannathan Raman
2619fb3fba1SJagannathan Raman qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o);
2629fb3fba1SJagannathan Raman }
2639fb3fba1SJagannathan Raman
vfu_object_cfg_access(vfu_ctx_t * vfu_ctx,char * const buf,size_t count,loff_t offset,const bool is_write)26490072f29SJagannathan Raman static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf,
26590072f29SJagannathan Raman size_t count, loff_t offset,
26690072f29SJagannathan Raman const bool is_write)
26790072f29SJagannathan Raman {
26890072f29SJagannathan Raman VfuObject *o = vfu_get_private(vfu_ctx);
26990072f29SJagannathan Raman uint32_t pci_access_width = sizeof(uint32_t);
27090072f29SJagannathan Raman size_t bytes = count;
27190072f29SJagannathan Raman uint32_t val = 0;
27290072f29SJagannathan Raman char *ptr = buf;
27390072f29SJagannathan Raman int len;
27490072f29SJagannathan Raman
27590072f29SJagannathan Raman /*
27690072f29SJagannathan Raman * Writes to the BAR registers would trigger an update to the
27790072f29SJagannathan Raman * global Memory and IO AddressSpaces. But the remote device
27890072f29SJagannathan Raman * never uses the global AddressSpaces, therefore overlapping
27990072f29SJagannathan Raman * memory regions are not a problem
28090072f29SJagannathan Raman */
28190072f29SJagannathan Raman while (bytes > 0) {
28290072f29SJagannathan Raman len = (bytes > pci_access_width) ? pci_access_width : bytes;
28390072f29SJagannathan Raman if (is_write) {
284*e6578f1fSMattias Nissler val = ldn_le_p(ptr, len);
28590072f29SJagannathan Raman pci_host_config_write_common(o->pci_dev, offset,
28690072f29SJagannathan Raman pci_config_size(o->pci_dev),
28790072f29SJagannathan Raman val, len);
28890072f29SJagannathan Raman trace_vfu_cfg_write(offset, val);
28990072f29SJagannathan Raman } else {
29090072f29SJagannathan Raman val = pci_host_config_read_common(o->pci_dev, offset,
29190072f29SJagannathan Raman pci_config_size(o->pci_dev), len);
292*e6578f1fSMattias Nissler stn_le_p(ptr, len, val);
29390072f29SJagannathan Raman trace_vfu_cfg_read(offset, val);
29490072f29SJagannathan Raman }
29590072f29SJagannathan Raman offset += len;
29690072f29SJagannathan Raman ptr += len;
29790072f29SJagannathan Raman bytes -= len;
29890072f29SJagannathan Raman }
29990072f29SJagannathan Raman
30090072f29SJagannathan Raman return count;
30190072f29SJagannathan Raman }
30290072f29SJagannathan Raman
dma_register(vfu_ctx_t * vfu_ctx,vfu_dma_info_t * info)30315ccf9beSJagannathan Raman static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
30415ccf9beSJagannathan Raman {
30515ccf9beSJagannathan Raman VfuObject *o = vfu_get_private(vfu_ctx);
30615ccf9beSJagannathan Raman AddressSpace *dma_as = NULL;
30715ccf9beSJagannathan Raman MemoryRegion *subregion = NULL;
30815ccf9beSJagannathan Raman g_autofree char *name = NULL;
30915ccf9beSJagannathan Raman struct iovec *iov = &info->iova;
31015ccf9beSJagannathan Raman
31115ccf9beSJagannathan Raman if (!info->vaddr) {
31215ccf9beSJagannathan Raman return;
31315ccf9beSJagannathan Raman }
31415ccf9beSJagannathan Raman
31515ccf9beSJagannathan Raman name = g_strdup_printf("mem-%s-%"PRIx64"", o->device,
31615ccf9beSJagannathan Raman (uint64_t)info->vaddr);
31715ccf9beSJagannathan Raman
31815ccf9beSJagannathan Raman subregion = g_new0(MemoryRegion, 1);
31915ccf9beSJagannathan Raman
32015ccf9beSJagannathan Raman memory_region_init_ram_ptr(subregion, NULL, name,
32115ccf9beSJagannathan Raman iov->iov_len, info->vaddr);
32215ccf9beSJagannathan Raman
32315ccf9beSJagannathan Raman dma_as = pci_device_iommu_address_space(o->pci_dev);
32415ccf9beSJagannathan Raman
32515ccf9beSJagannathan Raman memory_region_add_subregion(dma_as->root, (hwaddr)iov->iov_base, subregion);
32615ccf9beSJagannathan Raman
32715ccf9beSJagannathan Raman trace_vfu_dma_register((uint64_t)iov->iov_base, iov->iov_len);
32815ccf9beSJagannathan Raman }
32915ccf9beSJagannathan Raman
dma_unregister(vfu_ctx_t * vfu_ctx,vfu_dma_info_t * info)33015ccf9beSJagannathan Raman static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
33115ccf9beSJagannathan Raman {
33215ccf9beSJagannathan Raman VfuObject *o = vfu_get_private(vfu_ctx);
33315ccf9beSJagannathan Raman AddressSpace *dma_as = NULL;
33415ccf9beSJagannathan Raman MemoryRegion *mr = NULL;
33515ccf9beSJagannathan Raman ram_addr_t offset;
33615ccf9beSJagannathan Raman
33715ccf9beSJagannathan Raman mr = memory_region_from_host(info->vaddr, &offset);
33815ccf9beSJagannathan Raman if (!mr) {
33915ccf9beSJagannathan Raman return;
34015ccf9beSJagannathan Raman }
34115ccf9beSJagannathan Raman
34215ccf9beSJagannathan Raman dma_as = pci_device_iommu_address_space(o->pci_dev);
34315ccf9beSJagannathan Raman
34415ccf9beSJagannathan Raman memory_region_del_subregion(dma_as->root, mr);
34515ccf9beSJagannathan Raman
34615ccf9beSJagannathan Raman object_unparent((OBJECT(mr)));
34715ccf9beSJagannathan Raman
34815ccf9beSJagannathan Raman trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
34915ccf9beSJagannathan Raman }
35015ccf9beSJagannathan Raman
vfu_object_mr_rw(MemoryRegion * mr,uint8_t * buf,hwaddr offset,hwaddr size,const bool is_write)3513123f93dSJagannathan Raman static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset,
3523123f93dSJagannathan Raman hwaddr size, const bool is_write)
3533123f93dSJagannathan Raman {
3543123f93dSJagannathan Raman uint8_t *ptr = buf;
3553123f93dSJagannathan Raman bool release_lock = false;
3563123f93dSJagannathan Raman uint8_t *ram_ptr = NULL;
3573123f93dSJagannathan Raman MemTxResult result;
3583123f93dSJagannathan Raman int access_size;
3593123f93dSJagannathan Raman uint64_t val;
3603123f93dSJagannathan Raman
3613123f93dSJagannathan Raman if (memory_access_is_direct(mr, is_write)) {
3623123f93dSJagannathan Raman /**
3633123f93dSJagannathan Raman * Some devices expose a PCI expansion ROM, which could be buffer
3643123f93dSJagannathan Raman * based as compared to other regions which are primarily based on
3653123f93dSJagannathan Raman * MemoryRegionOps. memory_region_find() would already check
3663123f93dSJagannathan Raman * for buffer overflow, we don't need to repeat it here.
3673123f93dSJagannathan Raman */
3683123f93dSJagannathan Raman ram_ptr = memory_region_get_ram_ptr(mr);
3693123f93dSJagannathan Raman
3703123f93dSJagannathan Raman if (is_write) {
3713123f93dSJagannathan Raman memcpy((ram_ptr + offset), buf, size);
3723123f93dSJagannathan Raman } else {
3733123f93dSJagannathan Raman memcpy(buf, (ram_ptr + offset), size);
3743123f93dSJagannathan Raman }
3753123f93dSJagannathan Raman
3763123f93dSJagannathan Raman return 0;
3773123f93dSJagannathan Raman }
3783123f93dSJagannathan Raman
3793123f93dSJagannathan Raman while (size) {
3803123f93dSJagannathan Raman /**
3813123f93dSJagannathan Raman * The read/write logic used below is similar to the ones in
3823123f93dSJagannathan Raman * flatview_read/write_continue()
3833123f93dSJagannathan Raman */
3843123f93dSJagannathan Raman release_lock = prepare_mmio_access(mr);
3853123f93dSJagannathan Raman
3863123f93dSJagannathan Raman access_size = memory_access_size(mr, size, offset);
3873123f93dSJagannathan Raman
3883123f93dSJagannathan Raman if (is_write) {
3893123f93dSJagannathan Raman val = ldn_he_p(ptr, access_size);
3903123f93dSJagannathan Raman
3913123f93dSJagannathan Raman result = memory_region_dispatch_write(mr, offset, val,
3923123f93dSJagannathan Raman size_memop(access_size),
3933123f93dSJagannathan Raman MEMTXATTRS_UNSPECIFIED);
3943123f93dSJagannathan Raman } else {
3953123f93dSJagannathan Raman result = memory_region_dispatch_read(mr, offset, &val,
3963123f93dSJagannathan Raman size_memop(access_size),
3973123f93dSJagannathan Raman MEMTXATTRS_UNSPECIFIED);
3983123f93dSJagannathan Raman
3993123f93dSJagannathan Raman stn_he_p(ptr, access_size, val);
4003123f93dSJagannathan Raman }
4013123f93dSJagannathan Raman
4023123f93dSJagannathan Raman if (release_lock) {
403195801d7SStefan Hajnoczi bql_unlock();
4043123f93dSJagannathan Raman release_lock = false;
4053123f93dSJagannathan Raman }
4063123f93dSJagannathan Raman
4073123f93dSJagannathan Raman if (result != MEMTX_OK) {
4083123f93dSJagannathan Raman return -1;
4093123f93dSJagannathan Raman }
4103123f93dSJagannathan Raman
4113123f93dSJagannathan Raman size -= access_size;
4123123f93dSJagannathan Raman ptr += access_size;
4133123f93dSJagannathan Raman offset += access_size;
4143123f93dSJagannathan Raman }
4153123f93dSJagannathan Raman
4163123f93dSJagannathan Raman return 0;
4173123f93dSJagannathan Raman }
4183123f93dSJagannathan Raman
vfu_object_bar_rw(PCIDevice * pci_dev,int pci_bar,hwaddr bar_offset,char * const buf,hwaddr len,const bool is_write)4193123f93dSJagannathan Raman static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar,
4203123f93dSJagannathan Raman hwaddr bar_offset, char * const buf,
4213123f93dSJagannathan Raman hwaddr len, const bool is_write)
4223123f93dSJagannathan Raman {
4233123f93dSJagannathan Raman MemoryRegionSection section = { 0 };
4243123f93dSJagannathan Raman uint8_t *ptr = (uint8_t *)buf;
4253123f93dSJagannathan Raman MemoryRegion *section_mr = NULL;
4263123f93dSJagannathan Raman uint64_t section_size;
4273123f93dSJagannathan Raman hwaddr section_offset;
4283123f93dSJagannathan Raman hwaddr size = 0;
4293123f93dSJagannathan Raman
4303123f93dSJagannathan Raman while (len) {
4313123f93dSJagannathan Raman section = memory_region_find(pci_dev->io_regions[pci_bar].memory,
4323123f93dSJagannathan Raman bar_offset, len);
4333123f93dSJagannathan Raman
4343123f93dSJagannathan Raman if (!section.mr) {
4353123f93dSJagannathan Raman warn_report("vfu: invalid address 0x%"PRIx64"", bar_offset);
4363123f93dSJagannathan Raman return size;
4373123f93dSJagannathan Raman }
4383123f93dSJagannathan Raman
4393123f93dSJagannathan Raman section_mr = section.mr;
4403123f93dSJagannathan Raman section_offset = section.offset_within_region;
4413123f93dSJagannathan Raman section_size = int128_get64(section.size);
4423123f93dSJagannathan Raman
4433123f93dSJagannathan Raman if (is_write && section_mr->readonly) {
4443123f93dSJagannathan Raman warn_report("vfu: attempting to write to readonly region in "
4453123f93dSJagannathan Raman "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]",
4463123f93dSJagannathan Raman pci_bar, bar_offset,
4473123f93dSJagannathan Raman (bar_offset + section_size));
4483123f93dSJagannathan Raman memory_region_unref(section_mr);
4493123f93dSJagannathan Raman return size;
4503123f93dSJagannathan Raman }
4513123f93dSJagannathan Raman
4523123f93dSJagannathan Raman if (vfu_object_mr_rw(section_mr, ptr, section_offset,
4533123f93dSJagannathan Raman section_size, is_write)) {
4543123f93dSJagannathan Raman warn_report("vfu: failed to %s "
4553123f93dSJagannathan Raman "[0x%"PRIx64" - 0x%"PRIx64"] in bar %d",
4563123f93dSJagannathan Raman is_write ? "write to" : "read from", bar_offset,
4573123f93dSJagannathan Raman (bar_offset + section_size), pci_bar);
4583123f93dSJagannathan Raman memory_region_unref(section_mr);
4593123f93dSJagannathan Raman return size;
4603123f93dSJagannathan Raman }
4613123f93dSJagannathan Raman
4623123f93dSJagannathan Raman size += section_size;
4633123f93dSJagannathan Raman bar_offset += section_size;
4643123f93dSJagannathan Raman ptr += section_size;
4653123f93dSJagannathan Raman len -= section_size;
4663123f93dSJagannathan Raman
4673123f93dSJagannathan Raman memory_region_unref(section_mr);
4683123f93dSJagannathan Raman }
4693123f93dSJagannathan Raman
4703123f93dSJagannathan Raman return size;
4713123f93dSJagannathan Raman }
4723123f93dSJagannathan Raman
4733123f93dSJagannathan Raman /**
4743123f93dSJagannathan Raman * VFU_OBJECT_BAR_HANDLER - macro for defining handlers for PCI BARs.
4753123f93dSJagannathan Raman *
4763123f93dSJagannathan Raman * To create handler for BAR number 2, VFU_OBJECT_BAR_HANDLER(2) would
4773123f93dSJagannathan Raman * define vfu_object_bar2_handler
4783123f93dSJagannathan Raman */
4793123f93dSJagannathan Raman #define VFU_OBJECT_BAR_HANDLER(BAR_NO) \
4803123f93dSJagannathan Raman static ssize_t vfu_object_bar##BAR_NO##_handler(vfu_ctx_t *vfu_ctx, \
4813123f93dSJagannathan Raman char * const buf, size_t count, \
4823123f93dSJagannathan Raman loff_t offset, const bool is_write) \
4833123f93dSJagannathan Raman { \
4843123f93dSJagannathan Raman VfuObject *o = vfu_get_private(vfu_ctx); \
4853123f93dSJagannathan Raman PCIDevice *pci_dev = o->pci_dev; \
4863123f93dSJagannathan Raman \
4873123f93dSJagannathan Raman return vfu_object_bar_rw(pci_dev, BAR_NO, offset, \
4883123f93dSJagannathan Raman buf, count, is_write); \
4893123f93dSJagannathan Raman } \
4903123f93dSJagannathan Raman
4913123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(0)
4923123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(1)
4933123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(2)
4943123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(3)
4953123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(4)
4963123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(5)
4973123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(6)
4983123f93dSJagannathan Raman
4993123f93dSJagannathan Raman static vfu_region_access_cb_t *vfu_object_bar_handlers[PCI_NUM_REGIONS] = {
5003123f93dSJagannathan Raman &vfu_object_bar0_handler,
5013123f93dSJagannathan Raman &vfu_object_bar1_handler,
5023123f93dSJagannathan Raman &vfu_object_bar2_handler,
5033123f93dSJagannathan Raman &vfu_object_bar3_handler,
5043123f93dSJagannathan Raman &vfu_object_bar4_handler,
5053123f93dSJagannathan Raman &vfu_object_bar5_handler,
5063123f93dSJagannathan Raman &vfu_object_bar6_handler,
5073123f93dSJagannathan Raman };
5083123f93dSJagannathan Raman
5093123f93dSJagannathan Raman /**
5103123f93dSJagannathan Raman * vfu_object_register_bars - Identify active BAR regions of pdev and setup
5113123f93dSJagannathan Raman * callbacks to handle read/write accesses
5123123f93dSJagannathan Raman */
vfu_object_register_bars(vfu_ctx_t * vfu_ctx,PCIDevice * pdev)5133123f93dSJagannathan Raman static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev)
5143123f93dSJagannathan Raman {
5153123f93dSJagannathan Raman int flags = VFU_REGION_FLAG_RW;
5163123f93dSJagannathan Raman int i;
5173123f93dSJagannathan Raman
5183123f93dSJagannathan Raman for (i = 0; i < PCI_NUM_REGIONS; i++) {
5193123f93dSJagannathan Raman if (!pdev->io_regions[i].size) {
5203123f93dSJagannathan Raman continue;
5213123f93dSJagannathan Raman }
5223123f93dSJagannathan Raman
5233123f93dSJagannathan Raman if ((i == VFU_PCI_DEV_ROM_REGION_IDX) ||
5243123f93dSJagannathan Raman pdev->io_regions[i].memory->readonly) {
5253123f93dSJagannathan Raman flags &= ~VFU_REGION_FLAG_WRITE;
5263123f93dSJagannathan Raman }
5273123f93dSJagannathan Raman
5283123f93dSJagannathan Raman vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX + i,
5293123f93dSJagannathan Raman (size_t)pdev->io_regions[i].size,
5303123f93dSJagannathan Raman vfu_object_bar_handlers[i],
5313123f93dSJagannathan Raman flags, NULL, 0, -1, 0);
5323123f93dSJagannathan Raman
5333123f93dSJagannathan Raman trace_vfu_bar_register(i, pdev->io_regions[i].addr,
5343123f93dSJagannathan Raman pdev->io_regions[i].size);
5353123f93dSJagannathan Raman }
5363123f93dSJagannathan Raman }
5373123f93dSJagannathan Raman
vfu_object_map_irq(PCIDevice * pci_dev,int intx)53808cf3dc6SJagannathan Raman static int vfu_object_map_irq(PCIDevice *pci_dev, int intx)
53908cf3dc6SJagannathan Raman {
54008cf3dc6SJagannathan Raman int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
54108cf3dc6SJagannathan Raman pci_dev->devfn);
54208cf3dc6SJagannathan Raman
54308cf3dc6SJagannathan Raman return pci_bdf;
54408cf3dc6SJagannathan Raman }
54508cf3dc6SJagannathan Raman
vfu_object_set_irq(void * opaque,int pirq,int level)54608cf3dc6SJagannathan Raman static void vfu_object_set_irq(void *opaque, int pirq, int level)
54708cf3dc6SJagannathan Raman {
54808cf3dc6SJagannathan Raman PCIBus *pci_bus = opaque;
54908cf3dc6SJagannathan Raman PCIDevice *pci_dev = NULL;
55008cf3dc6SJagannathan Raman vfu_ctx_t *vfu_ctx = NULL;
55108cf3dc6SJagannathan Raman int pci_bus_num, devfn;
55208cf3dc6SJagannathan Raman
55308cf3dc6SJagannathan Raman if (level) {
55408cf3dc6SJagannathan Raman pci_bus_num = PCI_BUS_NUM(pirq);
55508cf3dc6SJagannathan Raman devfn = PCI_BDF_TO_DEVFN(pirq);
55608cf3dc6SJagannathan Raman
55708cf3dc6SJagannathan Raman /*
55808cf3dc6SJagannathan Raman * pci_find_device() performs at O(1) if the device is attached
55908cf3dc6SJagannathan Raman * to the root PCI bus. Whereas, if the device is attached to a
56008cf3dc6SJagannathan Raman * secondary PCI bus (such as when a root port is involved),
56108cf3dc6SJagannathan Raman * finding the parent PCI bus could take O(n)
56208cf3dc6SJagannathan Raman */
56308cf3dc6SJagannathan Raman pci_dev = pci_find_device(pci_bus, pci_bus_num, devfn);
56408cf3dc6SJagannathan Raman
56508cf3dc6SJagannathan Raman vfu_ctx = pci_dev->irq_opaque;
56608cf3dc6SJagannathan Raman
56708cf3dc6SJagannathan Raman g_assert(vfu_ctx);
56808cf3dc6SJagannathan Raman
56908cf3dc6SJagannathan Raman vfu_irq_trigger(vfu_ctx, 0);
57008cf3dc6SJagannathan Raman }
57108cf3dc6SJagannathan Raman }
57208cf3dc6SJagannathan Raman
vfu_object_msi_prepare_msg(PCIDevice * pci_dev,unsigned int vector)57308cf3dc6SJagannathan Raman static MSIMessage vfu_object_msi_prepare_msg(PCIDevice *pci_dev,
57408cf3dc6SJagannathan Raman unsigned int vector)
57508cf3dc6SJagannathan Raman {
57608cf3dc6SJagannathan Raman MSIMessage msg;
57708cf3dc6SJagannathan Raman
57808cf3dc6SJagannathan Raman msg.address = 0;
57908cf3dc6SJagannathan Raman msg.data = vector;
58008cf3dc6SJagannathan Raman
58108cf3dc6SJagannathan Raman return msg;
58208cf3dc6SJagannathan Raman }
58308cf3dc6SJagannathan Raman
vfu_object_msi_trigger(PCIDevice * pci_dev,MSIMessage msg)58408cf3dc6SJagannathan Raman static void vfu_object_msi_trigger(PCIDevice *pci_dev, MSIMessage msg)
58508cf3dc6SJagannathan Raman {
58608cf3dc6SJagannathan Raman vfu_ctx_t *vfu_ctx = pci_dev->irq_opaque;
58708cf3dc6SJagannathan Raman
58808cf3dc6SJagannathan Raman vfu_irq_trigger(vfu_ctx, msg.data);
58908cf3dc6SJagannathan Raman }
59008cf3dc6SJagannathan Raman
vfu_object_setup_msi_cbs(VfuObject * o)59108cf3dc6SJagannathan Raman static void vfu_object_setup_msi_cbs(VfuObject *o)
59208cf3dc6SJagannathan Raman {
59308cf3dc6SJagannathan Raman o->default_msi_trigger = o->pci_dev->msi_trigger;
59408cf3dc6SJagannathan Raman o->default_msi_prepare_message = o->pci_dev->msi_prepare_message;
59508cf3dc6SJagannathan Raman o->default_msix_prepare_message = o->pci_dev->msix_prepare_message;
59608cf3dc6SJagannathan Raman
59708cf3dc6SJagannathan Raman o->pci_dev->msi_trigger = vfu_object_msi_trigger;
59808cf3dc6SJagannathan Raman o->pci_dev->msi_prepare_message = vfu_object_msi_prepare_msg;
59908cf3dc6SJagannathan Raman o->pci_dev->msix_prepare_message = vfu_object_msi_prepare_msg;
60008cf3dc6SJagannathan Raman }
60108cf3dc6SJagannathan Raman
vfu_object_restore_msi_cbs(VfuObject * o)60208cf3dc6SJagannathan Raman static void vfu_object_restore_msi_cbs(VfuObject *o)
60308cf3dc6SJagannathan Raman {
60408cf3dc6SJagannathan Raman o->pci_dev->msi_trigger = o->default_msi_trigger;
60508cf3dc6SJagannathan Raman o->pci_dev->msi_prepare_message = o->default_msi_prepare_message;
60608cf3dc6SJagannathan Raman o->pci_dev->msix_prepare_message = o->default_msix_prepare_message;
60708cf3dc6SJagannathan Raman }
60808cf3dc6SJagannathan Raman
vfu_msix_irq_state(vfu_ctx_t * vfu_ctx,uint32_t start,uint32_t count,bool mask)60908cf3dc6SJagannathan Raman static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
61008cf3dc6SJagannathan Raman uint32_t count, bool mask)
61108cf3dc6SJagannathan Raman {
61208cf3dc6SJagannathan Raman VfuObject *o = vfu_get_private(vfu_ctx);
61308cf3dc6SJagannathan Raman uint32_t vector;
61408cf3dc6SJagannathan Raman
61508cf3dc6SJagannathan Raman for (vector = start; vector < count; vector++) {
61615377f6eSAkihiko Odaki msix_set_mask(o->pci_dev, vector, mask);
61708cf3dc6SJagannathan Raman }
61808cf3dc6SJagannathan Raman }
61908cf3dc6SJagannathan Raman
vfu_msi_irq_state(vfu_ctx_t * vfu_ctx,uint32_t start,uint32_t count,bool mask)62008cf3dc6SJagannathan Raman static void vfu_msi_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
62108cf3dc6SJagannathan Raman uint32_t count, bool mask)
62208cf3dc6SJagannathan Raman {
62308cf3dc6SJagannathan Raman VfuObject *o = vfu_get_private(vfu_ctx);
62408cf3dc6SJagannathan Raman Error *err = NULL;
62508cf3dc6SJagannathan Raman uint32_t vector;
62608cf3dc6SJagannathan Raman
62708cf3dc6SJagannathan Raman for (vector = start; vector < count; vector++) {
62808cf3dc6SJagannathan Raman msi_set_mask(o->pci_dev, vector, mask, &err);
62908cf3dc6SJagannathan Raman if (err) {
63008cf3dc6SJagannathan Raman VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device,
63108cf3dc6SJagannathan Raman error_get_pretty(err));
63208cf3dc6SJagannathan Raman error_free(err);
63308cf3dc6SJagannathan Raman err = NULL;
63408cf3dc6SJagannathan Raman }
63508cf3dc6SJagannathan Raman }
63608cf3dc6SJagannathan Raman }
63708cf3dc6SJagannathan Raman
vfu_object_setup_irqs(VfuObject * o,PCIDevice * pci_dev)63808cf3dc6SJagannathan Raman static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev)
63908cf3dc6SJagannathan Raman {
64008cf3dc6SJagannathan Raman vfu_ctx_t *vfu_ctx = o->vfu_ctx;
64108cf3dc6SJagannathan Raman int ret;
64208cf3dc6SJagannathan Raman
64308cf3dc6SJagannathan Raman ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
64408cf3dc6SJagannathan Raman if (ret < 0) {
64508cf3dc6SJagannathan Raman return ret;
64608cf3dc6SJagannathan Raman }
64708cf3dc6SJagannathan Raman
64808cf3dc6SJagannathan Raman if (msix_nr_vectors_allocated(pci_dev)) {
64908cf3dc6SJagannathan Raman ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ,
65008cf3dc6SJagannathan Raman msix_nr_vectors_allocated(pci_dev));
65108cf3dc6SJagannathan Raman vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSIX_IRQ,
65208cf3dc6SJagannathan Raman &vfu_msix_irq_state);
65308cf3dc6SJagannathan Raman } else if (msi_nr_vectors_allocated(pci_dev)) {
65408cf3dc6SJagannathan Raman ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ,
65508cf3dc6SJagannathan Raman msi_nr_vectors_allocated(pci_dev));
65608cf3dc6SJagannathan Raman vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSI_IRQ,
65708cf3dc6SJagannathan Raman &vfu_msi_irq_state);
65808cf3dc6SJagannathan Raman }
65908cf3dc6SJagannathan Raman
66008cf3dc6SJagannathan Raman if (ret < 0) {
66108cf3dc6SJagannathan Raman return ret;
66208cf3dc6SJagannathan Raman }
66308cf3dc6SJagannathan Raman
66408cf3dc6SJagannathan Raman vfu_object_setup_msi_cbs(o);
66508cf3dc6SJagannathan Raman
66608cf3dc6SJagannathan Raman pci_dev->irq_opaque = vfu_ctx;
66708cf3dc6SJagannathan Raman
66808cf3dc6SJagannathan Raman return 0;
66908cf3dc6SJagannathan Raman }
67008cf3dc6SJagannathan Raman
vfu_object_set_bus_irq(PCIBus * pci_bus)67108cf3dc6SJagannathan Raman void vfu_object_set_bus_irq(PCIBus *pci_bus)
67208cf3dc6SJagannathan Raman {
67308cf3dc6SJagannathan Raman int bus_num = pci_bus_num(pci_bus);
67408cf3dc6SJagannathan Raman int max_bdf = PCI_BUILD_BDF(bus_num, PCI_DEVFN_MAX - 1);
67508cf3dc6SJagannathan Raman
676f021f4e9SBernhard Beschow pci_bus_irqs(pci_bus, vfu_object_set_irq, pci_bus, max_bdf);
677f021f4e9SBernhard Beschow pci_bus_map_irqs(pci_bus, vfu_object_map_irq);
67808cf3dc6SJagannathan Raman }
67908cf3dc6SJagannathan Raman
vfu_object_device_reset(vfu_ctx_t * vfu_ctx,vfu_reset_type_t type)68078e27dfaSJagannathan Raman static int vfu_object_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t type)
68178e27dfaSJagannathan Raman {
68278e27dfaSJagannathan Raman VfuObject *o = vfu_get_private(vfu_ctx);
68378e27dfaSJagannathan Raman
68478e27dfaSJagannathan Raman /* vfu_object_ctx_run() handles lost connection */
68578e27dfaSJagannathan Raman if (type == VFU_RESET_LOST_CONN) {
68678e27dfaSJagannathan Raman return 0;
68778e27dfaSJagannathan Raman }
68878e27dfaSJagannathan Raman
689dfa6ba6bSPeter Maydell device_cold_reset(DEVICE(o->pci_dev));
69078e27dfaSJagannathan Raman
69178e27dfaSJagannathan Raman return 0;
69278e27dfaSJagannathan Raman }
69378e27dfaSJagannathan Raman
69487f7249fSJagannathan Raman /*
69587f7249fSJagannathan Raman * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
69687f7249fSJagannathan Raman * properties. It also depends on devices instantiated in QEMU. These
69787f7249fSJagannathan Raman * dependencies are not available during the instance_init phase of this
69887f7249fSJagannathan Raman * object's life-cycle. As such, the server is initialized after the
69987f7249fSJagannathan Raman * machine is setup. machine_init_done_notifier notifies TYPE_VFU_OBJECT
70087f7249fSJagannathan Raman * when the machine is setup, and the dependencies are available.
70187f7249fSJagannathan Raman */
vfu_object_machine_done(Notifier * notifier,void * data)70287f7249fSJagannathan Raman static void vfu_object_machine_done(Notifier *notifier, void *data)
70387f7249fSJagannathan Raman {
70487f7249fSJagannathan Raman VfuObject *o = container_of(notifier, VfuObject, machine_done);
70587f7249fSJagannathan Raman Error *err = NULL;
70687f7249fSJagannathan Raman
70787f7249fSJagannathan Raman vfu_object_init_ctx(o, &err);
70887f7249fSJagannathan Raman
70987f7249fSJagannathan Raman if (err) {
71087f7249fSJagannathan Raman error_propagate(&error_abort, err);
71187f7249fSJagannathan Raman }
71287f7249fSJagannathan Raman }
71387f7249fSJagannathan Raman
7149fb3fba1SJagannathan Raman /**
7159fb3fba1SJagannathan Raman * vfu_object_init_ctx: Create and initialize libvfio-user context. Add
7169fb3fba1SJagannathan Raman * an unplug blocker for the associated PCI device. Setup a FD handler
7179fb3fba1SJagannathan Raman * to process incoming messages in the context's socket.
7189fb3fba1SJagannathan Raman *
7199fb3fba1SJagannathan Raman * The socket and device properties are mandatory, and this function
7209fb3fba1SJagannathan Raman * will not create the context without them - the setters for these
7219fb3fba1SJagannathan Raman * properties should call this function when the property is set. The
7229fb3fba1SJagannathan Raman * machine should also be ready when this function is invoked - it is
7239fb3fba1SJagannathan Raman * because QEMU objects are initialized before devices, and the
7249fb3fba1SJagannathan Raman * associated PCI device wouldn't be available at the object
7259fb3fba1SJagannathan Raman * initialization time. Until these conditions are satisfied, this
7269fb3fba1SJagannathan Raman * function would return early without performing any task.
7279fb3fba1SJagannathan Raman */
vfu_object_init_ctx(VfuObject * o,Error ** errp)72887f7249fSJagannathan Raman static void vfu_object_init_ctx(VfuObject *o, Error **errp)
72987f7249fSJagannathan Raman {
730a6e8d6d9SJagannathan Raman DeviceState *dev = NULL;
731a6e8d6d9SJagannathan Raman vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL;
732a6e8d6d9SJagannathan Raman int ret;
73387f7249fSJagannathan Raman
73487f7249fSJagannathan Raman if (o->vfu_ctx || !o->socket || !o->device ||
73587f7249fSJagannathan Raman !phase_check(PHASE_MACHINE_READY)) {
73687f7249fSJagannathan Raman return;
73787f7249fSJagannathan Raman }
73887f7249fSJagannathan Raman
73987f7249fSJagannathan Raman if (o->err) {
74087f7249fSJagannathan Raman error_propagate(errp, o->err);
74187f7249fSJagannathan Raman o->err = NULL;
74287f7249fSJagannathan Raman return;
74387f7249fSJagannathan Raman }
74487f7249fSJagannathan Raman
7459fb3fba1SJagannathan Raman o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path,
7469fb3fba1SJagannathan Raman LIBVFIO_USER_FLAG_ATTACH_NB,
74787f7249fSJagannathan Raman o, VFU_DEV_TYPE_PCI);
74887f7249fSJagannathan Raman if (o->vfu_ctx == NULL) {
74987f7249fSJagannathan Raman error_setg(errp, "vfu: Failed to create context - %s", strerror(errno));
75087f7249fSJagannathan Raman return;
75187f7249fSJagannathan Raman }
752a6e8d6d9SJagannathan Raman
753a6e8d6d9SJagannathan Raman dev = qdev_find_recursive(sysbus_get_default(), o->device);
754a6e8d6d9SJagannathan Raman if (dev == NULL) {
755a6e8d6d9SJagannathan Raman error_setg(errp, "vfu: Device %s not found", o->device);
756a6e8d6d9SJagannathan Raman goto fail;
757a6e8d6d9SJagannathan Raman }
758a6e8d6d9SJagannathan Raman
759a6e8d6d9SJagannathan Raman if (!object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
760a6e8d6d9SJagannathan Raman error_setg(errp, "vfu: %s not a PCI device", o->device);
761a6e8d6d9SJagannathan Raman goto fail;
762a6e8d6d9SJagannathan Raman }
763a6e8d6d9SJagannathan Raman
764a6e8d6d9SJagannathan Raman o->pci_dev = PCI_DEVICE(dev);
765a6e8d6d9SJagannathan Raman
766a6e8d6d9SJagannathan Raman object_ref(OBJECT(o->pci_dev));
767a6e8d6d9SJagannathan Raman
768a6e8d6d9SJagannathan Raman if (pci_is_express(o->pci_dev)) {
769a6e8d6d9SJagannathan Raman pci_type = VFU_PCI_TYPE_EXPRESS;
770a6e8d6d9SJagannathan Raman }
771a6e8d6d9SJagannathan Raman
772a6e8d6d9SJagannathan Raman ret = vfu_pci_init(o->vfu_ctx, pci_type, PCI_HEADER_TYPE_NORMAL, 0);
773a6e8d6d9SJagannathan Raman if (ret < 0) {
774a6e8d6d9SJagannathan Raman error_setg(errp,
775a6e8d6d9SJagannathan Raman "vfu: Failed to attach PCI device %s to context - %s",
776a6e8d6d9SJagannathan Raman o->device, strerror(errno));
777a6e8d6d9SJagannathan Raman goto fail;
778a6e8d6d9SJagannathan Raman }
779a6e8d6d9SJagannathan Raman
780a6e8d6d9SJagannathan Raman error_setg(&o->unplug_blocker,
781a6e8d6d9SJagannathan Raman "vfu: %s for %s must be deleted before unplugging",
782a6e8d6d9SJagannathan Raman TYPE_VFU_OBJECT, o->device);
783a6e8d6d9SJagannathan Raman qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
784a6e8d6d9SJagannathan Raman
78590072f29SJagannathan Raman ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX,
78690072f29SJagannathan Raman pci_config_size(o->pci_dev), &vfu_object_cfg_access,
78790072f29SJagannathan Raman VFU_REGION_FLAG_RW | VFU_REGION_FLAG_ALWAYS_CB,
78890072f29SJagannathan Raman NULL, 0, -1, 0);
78990072f29SJagannathan Raman if (ret < 0) {
79090072f29SJagannathan Raman error_setg(errp,
79190072f29SJagannathan Raman "vfu: Failed to setup config space handlers for %s- %s",
79290072f29SJagannathan Raman o->device, strerror(errno));
79390072f29SJagannathan Raman goto fail;
79490072f29SJagannathan Raman }
79590072f29SJagannathan Raman
79615ccf9beSJagannathan Raman ret = vfu_setup_device_dma(o->vfu_ctx, &dma_register, &dma_unregister);
79715ccf9beSJagannathan Raman if (ret < 0) {
79815ccf9beSJagannathan Raman error_setg(errp, "vfu: Failed to setup DMA handlers for %s",
79915ccf9beSJagannathan Raman o->device);
80015ccf9beSJagannathan Raman goto fail;
80115ccf9beSJagannathan Raman }
80215ccf9beSJagannathan Raman
8033123f93dSJagannathan Raman vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
8043123f93dSJagannathan Raman
80508cf3dc6SJagannathan Raman ret = vfu_object_setup_irqs(o, o->pci_dev);
80608cf3dc6SJagannathan Raman if (ret < 0) {
80708cf3dc6SJagannathan Raman error_setg(errp, "vfu: Failed to setup interrupts for %s",
80808cf3dc6SJagannathan Raman o->device);
80908cf3dc6SJagannathan Raman goto fail;
81008cf3dc6SJagannathan Raman }
81108cf3dc6SJagannathan Raman
81278e27dfaSJagannathan Raman ret = vfu_setup_device_reset_cb(o->vfu_ctx, &vfu_object_device_reset);
81378e27dfaSJagannathan Raman if (ret < 0) {
81478e27dfaSJagannathan Raman error_setg(errp, "vfu: Failed to setup reset callback");
81578e27dfaSJagannathan Raman goto fail;
81678e27dfaSJagannathan Raman }
81778e27dfaSJagannathan Raman
8189fb3fba1SJagannathan Raman ret = vfu_realize_ctx(o->vfu_ctx);
8199fb3fba1SJagannathan Raman if (ret < 0) {
8209fb3fba1SJagannathan Raman error_setg(errp, "vfu: Failed to realize device %s- %s",
8219fb3fba1SJagannathan Raman o->device, strerror(errno));
8229fb3fba1SJagannathan Raman goto fail;
8239fb3fba1SJagannathan Raman }
8249fb3fba1SJagannathan Raman
8259fb3fba1SJagannathan Raman o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
8269fb3fba1SJagannathan Raman if (o->vfu_poll_fd < 0) {
8279fb3fba1SJagannathan Raman error_setg(errp, "vfu: Failed to get poll fd %s", o->device);
8289fb3fba1SJagannathan Raman goto fail;
8299fb3fba1SJagannathan Raman }
8309fb3fba1SJagannathan Raman
8319fb3fba1SJagannathan Raman qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o);
8329fb3fba1SJagannathan Raman
833a6e8d6d9SJagannathan Raman return;
834a6e8d6d9SJagannathan Raman
835a6e8d6d9SJagannathan Raman fail:
836a6e8d6d9SJagannathan Raman vfu_destroy_ctx(o->vfu_ctx);
837a6e8d6d9SJagannathan Raman if (o->unplug_blocker && o->pci_dev) {
838a6e8d6d9SJagannathan Raman qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
839a6e8d6d9SJagannathan Raman error_free(o->unplug_blocker);
840a6e8d6d9SJagannathan Raman o->unplug_blocker = NULL;
841a6e8d6d9SJagannathan Raman }
842a6e8d6d9SJagannathan Raman if (o->pci_dev) {
84308cf3dc6SJagannathan Raman vfu_object_restore_msi_cbs(o);
84408cf3dc6SJagannathan Raman o->pci_dev->irq_opaque = NULL;
845a6e8d6d9SJagannathan Raman object_unref(OBJECT(o->pci_dev));
846a6e8d6d9SJagannathan Raman o->pci_dev = NULL;
847a6e8d6d9SJagannathan Raman }
848a6e8d6d9SJagannathan Raman o->vfu_ctx = NULL;
8498f9a9259SJagannathan Raman }
8508f9a9259SJagannathan Raman
vfu_object_init(Object * obj)8518f9a9259SJagannathan Raman static void vfu_object_init(Object *obj)
8528f9a9259SJagannathan Raman {
8538f9a9259SJagannathan Raman VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
8548f9a9259SJagannathan Raman VfuObject *o = VFU_OBJECT(obj);
8558f9a9259SJagannathan Raman
8568f9a9259SJagannathan Raman k->nr_devs++;
8578f9a9259SJagannathan Raman
8588f9a9259SJagannathan Raman if (!object_dynamic_cast(OBJECT(current_machine), TYPE_REMOTE_MACHINE)) {
8598f9a9259SJagannathan Raman error_setg(&o->err, "vfu: %s only compatible with %s machine",
8608f9a9259SJagannathan Raman TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE);
8618f9a9259SJagannathan Raman return;
8628f9a9259SJagannathan Raman }
86387f7249fSJagannathan Raman
86487f7249fSJagannathan Raman if (!phase_check(PHASE_MACHINE_READY)) {
86587f7249fSJagannathan Raman o->machine_done.notify = vfu_object_machine_done;
86687f7249fSJagannathan Raman qemu_add_machine_init_done_notifier(&o->machine_done);
86787f7249fSJagannathan Raman }
86887f7249fSJagannathan Raman
8699fb3fba1SJagannathan Raman o->vfu_poll_fd = -1;
8708f9a9259SJagannathan Raman }
8718f9a9259SJagannathan Raman
vfu_object_finalize(Object * obj)8728f9a9259SJagannathan Raman static void vfu_object_finalize(Object *obj)
8738f9a9259SJagannathan Raman {
8748f9a9259SJagannathan Raman VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
8758f9a9259SJagannathan Raman VfuObject *o = VFU_OBJECT(obj);
8768f9a9259SJagannathan Raman
8778f9a9259SJagannathan Raman k->nr_devs--;
8788f9a9259SJagannathan Raman
8798f9a9259SJagannathan Raman qapi_free_SocketAddress(o->socket);
8808f9a9259SJagannathan Raman
8818f9a9259SJagannathan Raman o->socket = NULL;
8828f9a9259SJagannathan Raman
8839fb3fba1SJagannathan Raman if (o->vfu_poll_fd != -1) {
8849fb3fba1SJagannathan Raman qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
8859fb3fba1SJagannathan Raman o->vfu_poll_fd = -1;
8869fb3fba1SJagannathan Raman }
8879fb3fba1SJagannathan Raman
88887f7249fSJagannathan Raman if (o->vfu_ctx) {
88987f7249fSJagannathan Raman vfu_destroy_ctx(o->vfu_ctx);
89087f7249fSJagannathan Raman o->vfu_ctx = NULL;
89187f7249fSJagannathan Raman }
89287f7249fSJagannathan Raman
8938f9a9259SJagannathan Raman g_free(o->device);
8948f9a9259SJagannathan Raman
8958f9a9259SJagannathan Raman o->device = NULL;
8968f9a9259SJagannathan Raman
897a6e8d6d9SJagannathan Raman if (o->unplug_blocker && o->pci_dev) {
898a6e8d6d9SJagannathan Raman qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
899a6e8d6d9SJagannathan Raman error_free(o->unplug_blocker);
900a6e8d6d9SJagannathan Raman o->unplug_blocker = NULL;
901a6e8d6d9SJagannathan Raman }
902a6e8d6d9SJagannathan Raman
903a6e8d6d9SJagannathan Raman if (o->pci_dev) {
90408cf3dc6SJagannathan Raman vfu_object_restore_msi_cbs(o);
90508cf3dc6SJagannathan Raman o->pci_dev->irq_opaque = NULL;
906a6e8d6d9SJagannathan Raman object_unref(OBJECT(o->pci_dev));
907a6e8d6d9SJagannathan Raman o->pci_dev = NULL;
908a6e8d6d9SJagannathan Raman }
909a6e8d6d9SJagannathan Raman
9108f9a9259SJagannathan Raman if (!k->nr_devs && vfu_object_auto_shutdown()) {
9118f9a9259SJagannathan Raman qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
9128f9a9259SJagannathan Raman }
91387f7249fSJagannathan Raman
91487f7249fSJagannathan Raman if (o->machine_done.notify) {
91587f7249fSJagannathan Raman qemu_remove_machine_init_done_notifier(&o->machine_done);
91687f7249fSJagannathan Raman o->machine_done.notify = NULL;
91787f7249fSJagannathan Raman }
9188f9a9259SJagannathan Raman }
9198f9a9259SJagannathan Raman
vfu_object_class_init(ObjectClass * klass,void * data)9208f9a9259SJagannathan Raman static void vfu_object_class_init(ObjectClass *klass, void *data)
9218f9a9259SJagannathan Raman {
9228f9a9259SJagannathan Raman VfuObjectClass *k = VFU_OBJECT_CLASS(klass);
9238f9a9259SJagannathan Raman
9248f9a9259SJagannathan Raman k->nr_devs = 0;
9258f9a9259SJagannathan Raman
9268f9a9259SJagannathan Raman object_class_property_add(klass, "socket", "SocketAddress", NULL,
9278f9a9259SJagannathan Raman vfu_object_set_socket, NULL, NULL);
9288f9a9259SJagannathan Raman object_class_property_set_description(klass, "socket",
9298f9a9259SJagannathan Raman "SocketAddress "
9308f9a9259SJagannathan Raman "(ex: type=unix,path=/tmp/sock). "
9318f9a9259SJagannathan Raman "Only UNIX is presently supported");
9328f9a9259SJagannathan Raman object_class_property_add_str(klass, "device", NULL,
9338f9a9259SJagannathan Raman vfu_object_set_device);
9348f9a9259SJagannathan Raman object_class_property_set_description(klass, "device",
9358f9a9259SJagannathan Raman "device ID - only PCI devices "
9368f9a9259SJagannathan Raman "are presently supported");
9378f9a9259SJagannathan Raman }
9388f9a9259SJagannathan Raman
9398f9a9259SJagannathan Raman static const TypeInfo vfu_object_info = {
9408f9a9259SJagannathan Raman .name = TYPE_VFU_OBJECT,
9418f9a9259SJagannathan Raman .parent = TYPE_OBJECT,
9428f9a9259SJagannathan Raman .instance_size = sizeof(VfuObject),
9438f9a9259SJagannathan Raman .instance_init = vfu_object_init,
9448f9a9259SJagannathan Raman .instance_finalize = vfu_object_finalize,
9458f9a9259SJagannathan Raman .class_size = sizeof(VfuObjectClass),
9468f9a9259SJagannathan Raman .class_init = vfu_object_class_init,
9478f9a9259SJagannathan Raman .interfaces = (InterfaceInfo[]) {
9488f9a9259SJagannathan Raman { TYPE_USER_CREATABLE },
9498f9a9259SJagannathan Raman { }
9508f9a9259SJagannathan Raman }
9518f9a9259SJagannathan Raman };
9528f9a9259SJagannathan Raman
vfu_register_types(void)9538f9a9259SJagannathan Raman static void vfu_register_types(void)
9548f9a9259SJagannathan Raman {
9558f9a9259SJagannathan Raman type_register_static(&vfu_object_info);
9568f9a9259SJagannathan Raman }
9578f9a9259SJagannathan Raman
9588f9a9259SJagannathan Raman type_init(vfu_register_types);
959