19fca2b7dSJohn Levon /*
29fca2b7dSJohn Levon * Container for vfio-user IOMMU type: rather than communicating with the kernel
39fca2b7dSJohn Levon * vfio driver, we communicate over a socket to a server using the vfio-user
49fca2b7dSJohn Levon * protocol.
59fca2b7dSJohn Levon *
69fca2b7dSJohn Levon * SPDX-License-Identifier: GPL-2.0-or-later
79fca2b7dSJohn Levon */
89fca2b7dSJohn Levon
99fca2b7dSJohn Levon #include <sys/ioctl.h>
109fca2b7dSJohn Levon #include <linux/vfio.h>
119fca2b7dSJohn Levon #include "qemu/osdep.h"
129fca2b7dSJohn Levon
139fca2b7dSJohn Levon #include "hw/vfio-user/container.h"
143bdb738bSJohn Levon #include "hw/vfio-user/device.h"
1518e899e6SJohn Levon #include "hw/vfio-user/trace.h"
169fca2b7dSJohn Levon #include "hw/vfio/vfio-device.h"
179fca2b7dSJohn Levon #include "hw/vfio/vfio-listener.h"
189fca2b7dSJohn Levon #include "qapi/error.h"
199fca2b7dSJohn Levon
2018e899e6SJohn Levon /*
2118e899e6SJohn Levon * When DMA space is the physical address space, the region add/del listeners
2218e899e6SJohn Levon * will fire during memory update transactions. These depend on BQL being held,
2318e899e6SJohn Levon * so do any resulting map/demap ops async while keeping BQL.
2418e899e6SJohn Levon */
vfio_user_listener_begin(VFIOContainerBase * bcontainer)2518e899e6SJohn Levon static void vfio_user_listener_begin(VFIOContainerBase *bcontainer)
2618e899e6SJohn Levon {
2718e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
2818e899e6SJohn Levon bcontainer);
2918e899e6SJohn Levon
3018e899e6SJohn Levon container->proxy->async_ops = true;
3118e899e6SJohn Levon }
3218e899e6SJohn Levon
vfio_user_listener_commit(VFIOContainerBase * bcontainer)3318e899e6SJohn Levon static void vfio_user_listener_commit(VFIOContainerBase *bcontainer)
3418e899e6SJohn Levon {
3518e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
3618e899e6SJohn Levon bcontainer);
3718e899e6SJohn Levon
3818e899e6SJohn Levon /* wait here for any async requests sent during the transaction */
3918e899e6SJohn Levon container->proxy->async_ops = false;
4018e899e6SJohn Levon vfio_user_wait_reqs(container->proxy);
4118e899e6SJohn Levon }
4218e899e6SJohn Levon
vfio_user_dma_unmap(const VFIOContainerBase * bcontainer,hwaddr iova,ram_addr_t size,IOMMUTLBEntry * iotlb,bool unmap_all)439fca2b7dSJohn Levon static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
449fca2b7dSJohn Levon hwaddr iova, ram_addr_t size,
459fca2b7dSJohn Levon IOMMUTLBEntry *iotlb, bool unmap_all)
469fca2b7dSJohn Levon {
4718e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
4818e899e6SJohn Levon bcontainer);
4918e899e6SJohn Levon Error *local_err = NULL;
5018e899e6SJohn Levon int ret = 0;
5118e899e6SJohn Levon
5218e899e6SJohn Levon VFIOUserDMAUnmap *msgp = g_malloc(sizeof(*msgp));
5318e899e6SJohn Levon
5418e899e6SJohn Levon vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_UNMAP, sizeof(*msgp), 0);
5518e899e6SJohn Levon msgp->argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
5618e899e6SJohn Levon msgp->flags = unmap_all ? VFIO_DMA_UNMAP_FLAG_ALL : 0;
5718e899e6SJohn Levon msgp->iova = iova;
5818e899e6SJohn Levon msgp->size = size;
5918e899e6SJohn Levon trace_vfio_user_dma_unmap(msgp->iova, msgp->size, msgp->flags,
6018e899e6SJohn Levon container->proxy->async_ops);
6118e899e6SJohn Levon
6218e899e6SJohn Levon if (container->proxy->async_ops) {
6318e899e6SJohn Levon if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, NULL,
6418e899e6SJohn Levon 0, &local_err)) {
6518e899e6SJohn Levon error_report_err(local_err);
6618e899e6SJohn Levon ret = -EFAULT;
6718e899e6SJohn Levon }
6818e899e6SJohn Levon } else {
6918e899e6SJohn Levon if (!vfio_user_send_wait(container->proxy, &msgp->hdr, NULL,
7018e899e6SJohn Levon 0, &local_err)) {
7118e899e6SJohn Levon error_report_err(local_err);
7218e899e6SJohn Levon ret = -EFAULT;
7318e899e6SJohn Levon }
7418e899e6SJohn Levon
7518e899e6SJohn Levon if (msgp->hdr.flags & VFIO_USER_ERROR) {
7618e899e6SJohn Levon ret = -msgp->hdr.error_reply;
7718e899e6SJohn Levon }
7818e899e6SJohn Levon
7918e899e6SJohn Levon g_free(msgp);
8018e899e6SJohn Levon }
8118e899e6SJohn Levon
8218e899e6SJohn Levon return ret;
839fca2b7dSJohn Levon }
849fca2b7dSJohn Levon
vfio_user_dma_map(const VFIOContainerBase * bcontainer,hwaddr iova,ram_addr_t size,void * vaddr,bool readonly,MemoryRegion * mrp)859fca2b7dSJohn Levon static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
869fca2b7dSJohn Levon ram_addr_t size, void *vaddr, bool readonly,
879fca2b7dSJohn Levon MemoryRegion *mrp)
889fca2b7dSJohn Levon {
8918e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
9018e899e6SJohn Levon bcontainer);
9118e899e6SJohn Levon int fd = memory_region_get_fd(mrp);
9218e899e6SJohn Levon Error *local_err = NULL;
93*622740aaSJohn Levon int ret = 0;
9418e899e6SJohn Levon
9518e899e6SJohn Levon VFIOUserFDs *fds = NULL;
9618e899e6SJohn Levon VFIOUserDMAMap *msgp = g_malloc0(sizeof(*msgp));
9718e899e6SJohn Levon
9818e899e6SJohn Levon vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_MAP, sizeof(*msgp), 0);
9918e899e6SJohn Levon msgp->argsz = sizeof(struct vfio_iommu_type1_dma_map);
10018e899e6SJohn Levon msgp->flags = VFIO_DMA_MAP_FLAG_READ;
10118e899e6SJohn Levon msgp->offset = 0;
10218e899e6SJohn Levon msgp->iova = iova;
10318e899e6SJohn Levon msgp->size = size;
10418e899e6SJohn Levon
10518e899e6SJohn Levon /*
10618e899e6SJohn Levon * vaddr enters as a QEMU process address; make it either a file offset
10718e899e6SJohn Levon * for mapped areas or leave as 0.
10818e899e6SJohn Levon */
10918e899e6SJohn Levon if (fd != -1) {
11018e899e6SJohn Levon msgp->offset = qemu_ram_block_host_offset(mrp->ram_block, vaddr);
11118e899e6SJohn Levon }
11218e899e6SJohn Levon
11318e899e6SJohn Levon if (!readonly) {
11418e899e6SJohn Levon msgp->flags |= VFIO_DMA_MAP_FLAG_WRITE;
11518e899e6SJohn Levon }
11618e899e6SJohn Levon
11718e899e6SJohn Levon trace_vfio_user_dma_map(msgp->iova, msgp->size, msgp->offset, msgp->flags,
11818e899e6SJohn Levon container->proxy->async_ops);
11918e899e6SJohn Levon
12018e899e6SJohn Levon /*
12118e899e6SJohn Levon * The async_ops case sends without blocking. They're later waited for in
12218e899e6SJohn Levon * vfio_send_wait_reqs.
12318e899e6SJohn Levon */
12418e899e6SJohn Levon if (container->proxy->async_ops) {
12518e899e6SJohn Levon /* can't use auto variable since we don't block */
12618e899e6SJohn Levon if (fd != -1) {
12718e899e6SJohn Levon fds = vfio_user_getfds(1);
12818e899e6SJohn Levon fds->send_fds = 1;
12918e899e6SJohn Levon fds->fds[0] = fd;
13018e899e6SJohn Levon }
13118e899e6SJohn Levon
13218e899e6SJohn Levon if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, fds,
13318e899e6SJohn Levon 0, &local_err)) {
13418e899e6SJohn Levon error_report_err(local_err);
13518e899e6SJohn Levon ret = -EFAULT;
13618e899e6SJohn Levon }
13718e899e6SJohn Levon } else {
13818e899e6SJohn Levon VFIOUserFDs local_fds = { 1, 0, &fd };
13918e899e6SJohn Levon
14018e899e6SJohn Levon fds = fd != -1 ? &local_fds : NULL;
14118e899e6SJohn Levon
14218e899e6SJohn Levon if (!vfio_user_send_wait(container->proxy, &msgp->hdr, fds,
14318e899e6SJohn Levon 0, &local_err)) {
14418e899e6SJohn Levon error_report_err(local_err);
14518e899e6SJohn Levon ret = -EFAULT;
14618e899e6SJohn Levon }
14718e899e6SJohn Levon
14818e899e6SJohn Levon if (msgp->hdr.flags & VFIO_USER_ERROR) {
14918e899e6SJohn Levon ret = -msgp->hdr.error_reply;
15018e899e6SJohn Levon }
15118e899e6SJohn Levon
15218e899e6SJohn Levon g_free(msgp);
15318e899e6SJohn Levon }
15418e899e6SJohn Levon
15518e899e6SJohn Levon return ret;
1569fca2b7dSJohn Levon }
1579fca2b7dSJohn Levon
1589fca2b7dSJohn Levon static int
vfio_user_set_dirty_page_tracking(const VFIOContainerBase * bcontainer,bool start,Error ** errp)1599fca2b7dSJohn Levon vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
1609fca2b7dSJohn Levon bool start, Error **errp)
1619fca2b7dSJohn Levon {
1629fca2b7dSJohn Levon error_setg_errno(errp, ENOTSUP, "Not supported");
1639fca2b7dSJohn Levon return -ENOTSUP;
1649fca2b7dSJohn Levon }
1659fca2b7dSJohn Levon
vfio_user_query_dirty_bitmap(const VFIOContainerBase * bcontainer,VFIOBitmap * vbmap,hwaddr iova,hwaddr size,Error ** errp)1669fca2b7dSJohn Levon static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
1679fca2b7dSJohn Levon VFIOBitmap *vbmap, hwaddr iova,
1689fca2b7dSJohn Levon hwaddr size, Error **errp)
1699fca2b7dSJohn Levon {
1709fca2b7dSJohn Levon error_setg_errno(errp, ENOTSUP, "Not supported");
1719fca2b7dSJohn Levon return -ENOTSUP;
1729fca2b7dSJohn Levon }
1739fca2b7dSJohn Levon
vfio_user_setup(VFIOContainerBase * bcontainer,Error ** errp)1749fca2b7dSJohn Levon static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp)
1759fca2b7dSJohn Levon {
17652ce9c35SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
17752ce9c35SJohn Levon bcontainer);
17852ce9c35SJohn Levon
17952ce9c35SJohn Levon assert(container->proxy->dma_pgsizes != 0);
18052ce9c35SJohn Levon bcontainer->pgsizes = container->proxy->dma_pgsizes;
18152ce9c35SJohn Levon bcontainer->dma_max_mappings = container->proxy->max_dma;
18252ce9c35SJohn Levon
18352ce9c35SJohn Levon /* No live migration support yet. */
18452ce9c35SJohn Levon bcontainer->dirty_pages_supported = false;
18552ce9c35SJohn Levon bcontainer->max_dirty_bitmap_size = container->proxy->max_bitmap;
18652ce9c35SJohn Levon bcontainer->dirty_pgsizes = container->proxy->migr_pgsize;
18752ce9c35SJohn Levon
18852ce9c35SJohn Levon return true;
1899fca2b7dSJohn Levon }
1909fca2b7dSJohn Levon
vfio_user_create_container(VFIODevice * vbasedev,Error ** errp)19152ce9c35SJohn Levon static VFIOUserContainer *vfio_user_create_container(VFIODevice *vbasedev,
19252ce9c35SJohn Levon Error **errp)
1939fca2b7dSJohn Levon {
1949fca2b7dSJohn Levon VFIOUserContainer *container;
1959fca2b7dSJohn Levon
1969fca2b7dSJohn Levon container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER));
19752ce9c35SJohn Levon container->proxy = vbasedev->proxy;
1989fca2b7dSJohn Levon return container;
1999fca2b7dSJohn Levon }
2009fca2b7dSJohn Levon
2019fca2b7dSJohn Levon /*
2029fca2b7dSJohn Levon * Try to mirror vfio_container_connect() as much as possible.
2039fca2b7dSJohn Levon */
2049fca2b7dSJohn Levon static VFIOUserContainer *
vfio_user_container_connect(AddressSpace * as,VFIODevice * vbasedev,Error ** errp)20552ce9c35SJohn Levon vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev,
20652ce9c35SJohn Levon Error **errp)
2079fca2b7dSJohn Levon {
2089fca2b7dSJohn Levon VFIOContainerBase *bcontainer;
2099fca2b7dSJohn Levon VFIOUserContainer *container;
2109fca2b7dSJohn Levon VFIOAddressSpace *space;
2119fca2b7dSJohn Levon VFIOIOMMUClass *vioc;
21252ce9c35SJohn Levon int ret;
2139fca2b7dSJohn Levon
2149fca2b7dSJohn Levon space = vfio_address_space_get(as);
2159fca2b7dSJohn Levon
21652ce9c35SJohn Levon container = vfio_user_create_container(vbasedev, errp);
2179fca2b7dSJohn Levon if (!container) {
2189fca2b7dSJohn Levon goto put_space_exit;
2199fca2b7dSJohn Levon }
2209fca2b7dSJohn Levon
2219fca2b7dSJohn Levon bcontainer = &container->bcontainer;
2229fca2b7dSJohn Levon
22352ce9c35SJohn Levon ret = ram_block_uncoordinated_discard_disable(true);
22452ce9c35SJohn Levon if (ret) {
22552ce9c35SJohn Levon error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
226f7c5dff2SMark Cave-Ayland goto free_container_exit;
22752ce9c35SJohn Levon }
22852ce9c35SJohn Levon
2299fca2b7dSJohn Levon vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
2309fca2b7dSJohn Levon assert(vioc->setup);
2319fca2b7dSJohn Levon
2329fca2b7dSJohn Levon if (!vioc->setup(bcontainer, errp)) {
23352ce9c35SJohn Levon goto enable_discards_exit;
2349fca2b7dSJohn Levon }
2359fca2b7dSJohn Levon
2369fca2b7dSJohn Levon vfio_address_space_insert(space, bcontainer);
2379fca2b7dSJohn Levon
2389fca2b7dSJohn Levon if (!vfio_listener_register(bcontainer, errp)) {
2399fca2b7dSJohn Levon goto listener_release_exit;
2409fca2b7dSJohn Levon }
2419fca2b7dSJohn Levon
2429fca2b7dSJohn Levon bcontainer->initialized = true;
2439fca2b7dSJohn Levon
2449fca2b7dSJohn Levon return container;
2459fca2b7dSJohn Levon
2469fca2b7dSJohn Levon listener_release_exit:
2479fca2b7dSJohn Levon vfio_listener_unregister(bcontainer);
2489fca2b7dSJohn Levon if (vioc->release) {
2499fca2b7dSJohn Levon vioc->release(bcontainer);
2509fca2b7dSJohn Levon }
2519fca2b7dSJohn Levon
25252ce9c35SJohn Levon enable_discards_exit:
25352ce9c35SJohn Levon ram_block_uncoordinated_discard_disable(false);
25452ce9c35SJohn Levon
2559fca2b7dSJohn Levon free_container_exit:
2569fca2b7dSJohn Levon object_unref(container);
2579fca2b7dSJohn Levon
2589fca2b7dSJohn Levon put_space_exit:
2599fca2b7dSJohn Levon vfio_address_space_put(space);
2609fca2b7dSJohn Levon
2619fca2b7dSJohn Levon return NULL;
2629fca2b7dSJohn Levon }
2639fca2b7dSJohn Levon
vfio_user_container_disconnect(VFIOUserContainer * container)2649fca2b7dSJohn Levon static void vfio_user_container_disconnect(VFIOUserContainer *container)
2659fca2b7dSJohn Levon {
2669fca2b7dSJohn Levon VFIOContainerBase *bcontainer = &container->bcontainer;
2679fca2b7dSJohn Levon VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
26852ce9c35SJohn Levon VFIOAddressSpace *space = bcontainer->space;
26952ce9c35SJohn Levon
27052ce9c35SJohn Levon ram_block_uncoordinated_discard_disable(false);
2719fca2b7dSJohn Levon
2729fca2b7dSJohn Levon vfio_listener_unregister(bcontainer);
2739fca2b7dSJohn Levon if (vioc->release) {
2749fca2b7dSJohn Levon vioc->release(bcontainer);
2759fca2b7dSJohn Levon }
2769fca2b7dSJohn Levon
2779fca2b7dSJohn Levon object_unref(container);
2789fca2b7dSJohn Levon
2799fca2b7dSJohn Levon vfio_address_space_put(space);
2809fca2b7dSJohn Levon }
2819fca2b7dSJohn Levon
vfio_user_device_get(VFIOUserContainer * container,VFIODevice * vbasedev,Error ** errp)2829fca2b7dSJohn Levon static bool vfio_user_device_get(VFIOUserContainer *container,
2839fca2b7dSJohn Levon VFIODevice *vbasedev, Error **errp)
2849fca2b7dSJohn Levon {
2853bdb738bSJohn Levon struct vfio_device_info info = { .argsz = sizeof(info) };
2863bdb738bSJohn Levon
2873bdb738bSJohn Levon
2883bdb738bSJohn Levon if (!vfio_user_get_device_info(vbasedev->proxy, &info, errp)) {
2893bdb738bSJohn Levon return false;
2903bdb738bSJohn Levon }
2919fca2b7dSJohn Levon
2929fca2b7dSJohn Levon vbasedev->fd = -1;
2939fca2b7dSJohn Levon
2949fca2b7dSJohn Levon vfio_device_prepare(vbasedev, &container->bcontainer, &info);
2959fca2b7dSJohn Levon
2969fca2b7dSJohn Levon return true;
2979fca2b7dSJohn Levon }
2989fca2b7dSJohn Levon
2999fca2b7dSJohn Levon /*
3009fca2b7dSJohn Levon * vfio_user_device_attach: attach a device to a new container.
3019fca2b7dSJohn Levon */
vfio_user_device_attach(const char * name,VFIODevice * vbasedev,AddressSpace * as,Error ** errp)3029fca2b7dSJohn Levon static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev,
3039fca2b7dSJohn Levon AddressSpace *as, Error **errp)
3049fca2b7dSJohn Levon {
3059fca2b7dSJohn Levon VFIOUserContainer *container;
3069fca2b7dSJohn Levon
30752ce9c35SJohn Levon container = vfio_user_container_connect(as, vbasedev, errp);
3089fca2b7dSJohn Levon if (container == NULL) {
3099fca2b7dSJohn Levon error_prepend(errp, "failed to connect proxy");
3109fca2b7dSJohn Levon return false;
3119fca2b7dSJohn Levon }
3129fca2b7dSJohn Levon
3139fca2b7dSJohn Levon return vfio_user_device_get(container, vbasedev, errp);
3149fca2b7dSJohn Levon }
3159fca2b7dSJohn Levon
vfio_user_device_detach(VFIODevice * vbasedev)3169fca2b7dSJohn Levon static void vfio_user_device_detach(VFIODevice *vbasedev)
3179fca2b7dSJohn Levon {
3189fca2b7dSJohn Levon VFIOUserContainer *container = container_of(vbasedev->bcontainer,
3199fca2b7dSJohn Levon VFIOUserContainer, bcontainer);
3209fca2b7dSJohn Levon
3219fca2b7dSJohn Levon vfio_device_unprepare(vbasedev);
3229fca2b7dSJohn Levon
3239fca2b7dSJohn Levon vfio_user_container_disconnect(container);
3249fca2b7dSJohn Levon }
3259fca2b7dSJohn Levon
vfio_user_pci_hot_reset(VFIODevice * vbasedev,bool single)3269fca2b7dSJohn Levon static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single)
3279fca2b7dSJohn Levon {
3289fca2b7dSJohn Levon /* ->needs_reset is always false for vfio-user. */
3299fca2b7dSJohn Levon return 0;
3309fca2b7dSJohn Levon }
3319fca2b7dSJohn Levon
vfio_iommu_user_class_init(ObjectClass * klass,const void * data)3329fca2b7dSJohn Levon static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data)
3339fca2b7dSJohn Levon {
3349fca2b7dSJohn Levon VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
3359fca2b7dSJohn Levon
3369fca2b7dSJohn Levon vioc->setup = vfio_user_setup;
33718e899e6SJohn Levon vioc->listener_begin = vfio_user_listener_begin,
33818e899e6SJohn Levon vioc->listener_commit = vfio_user_listener_commit,
3399fca2b7dSJohn Levon vioc->dma_map = vfio_user_dma_map;
3409fca2b7dSJohn Levon vioc->dma_unmap = vfio_user_dma_unmap;
3419fca2b7dSJohn Levon vioc->attach_device = vfio_user_device_attach;
3429fca2b7dSJohn Levon vioc->detach_device = vfio_user_device_detach;
3439fca2b7dSJohn Levon vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking;
3449fca2b7dSJohn Levon vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap;
3459fca2b7dSJohn Levon vioc->pci_hot_reset = vfio_user_pci_hot_reset;
3469fca2b7dSJohn Levon };
3479fca2b7dSJohn Levon
3489fca2b7dSJohn Levon static const TypeInfo types[] = {
3499fca2b7dSJohn Levon {
3509fca2b7dSJohn Levon .name = TYPE_VFIO_IOMMU_USER,
3519fca2b7dSJohn Levon .parent = TYPE_VFIO_IOMMU,
3529fca2b7dSJohn Levon .instance_size = sizeof(VFIOUserContainer),
3539fca2b7dSJohn Levon .class_init = vfio_iommu_user_class_init,
3549fca2b7dSJohn Levon },
3559fca2b7dSJohn Levon };
3569fca2b7dSJohn Levon
3579fca2b7dSJohn Levon DEFINE_TYPES(types)
358