19fca2b7dSJohn Levon /* 29fca2b7dSJohn Levon * Container for vfio-user IOMMU type: rather than communicating with the kernel 39fca2b7dSJohn Levon * vfio driver, we communicate over a socket to a server using the vfio-user 49fca2b7dSJohn Levon * protocol. 59fca2b7dSJohn Levon * 69fca2b7dSJohn Levon * SPDX-License-Identifier: GPL-2.0-or-later 79fca2b7dSJohn Levon */ 89fca2b7dSJohn Levon 99fca2b7dSJohn Levon #include <sys/ioctl.h> 109fca2b7dSJohn Levon #include <linux/vfio.h> 119fca2b7dSJohn Levon #include "qemu/osdep.h" 129fca2b7dSJohn Levon 139fca2b7dSJohn Levon #include "hw/vfio-user/container.h" 143bdb738bSJohn Levon #include "hw/vfio-user/device.h" 1518e899e6SJohn Levon #include "hw/vfio-user/trace.h" 169fca2b7dSJohn Levon #include "hw/vfio/vfio-device.h" 179fca2b7dSJohn Levon #include "hw/vfio/vfio-listener.h" 189fca2b7dSJohn Levon #include "qapi/error.h" 199fca2b7dSJohn Levon 2018e899e6SJohn Levon /* 2118e899e6SJohn Levon * When DMA space is the physical address space, the region add/del listeners 2218e899e6SJohn Levon * will fire during memory update transactions. These depend on BQL being held, 2318e899e6SJohn Levon * so do any resulting map/demap ops async while keeping BQL. 2418e899e6SJohn Levon */ 2518e899e6SJohn Levon static void vfio_user_listener_begin(VFIOContainerBase *bcontainer) 2618e899e6SJohn Levon { 2718e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, 2818e899e6SJohn Levon bcontainer); 2918e899e6SJohn Levon 3018e899e6SJohn Levon container->proxy->async_ops = true; 3118e899e6SJohn Levon } 3218e899e6SJohn Levon 3318e899e6SJohn Levon static void vfio_user_listener_commit(VFIOContainerBase *bcontainer) 3418e899e6SJohn Levon { 3518e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, 3618e899e6SJohn Levon bcontainer); 3718e899e6SJohn Levon 3818e899e6SJohn Levon /* wait here for any async requests sent during the transaction */ 3918e899e6SJohn Levon container->proxy->async_ops = false; 4018e899e6SJohn Levon vfio_user_wait_reqs(container->proxy); 4118e899e6SJohn Levon } 4218e899e6SJohn Levon 439fca2b7dSJohn Levon static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer, 449fca2b7dSJohn Levon hwaddr iova, ram_addr_t size, 459fca2b7dSJohn Levon IOMMUTLBEntry *iotlb, bool unmap_all) 469fca2b7dSJohn Levon { 4718e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, 4818e899e6SJohn Levon bcontainer); 4918e899e6SJohn Levon Error *local_err = NULL; 5018e899e6SJohn Levon int ret = 0; 5118e899e6SJohn Levon 5218e899e6SJohn Levon VFIOUserDMAUnmap *msgp = g_malloc(sizeof(*msgp)); 5318e899e6SJohn Levon 5418e899e6SJohn Levon vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_UNMAP, sizeof(*msgp), 0); 5518e899e6SJohn Levon msgp->argsz = sizeof(struct vfio_iommu_type1_dma_unmap); 5618e899e6SJohn Levon msgp->flags = unmap_all ? VFIO_DMA_UNMAP_FLAG_ALL : 0; 5718e899e6SJohn Levon msgp->iova = iova; 5818e899e6SJohn Levon msgp->size = size; 5918e899e6SJohn Levon trace_vfio_user_dma_unmap(msgp->iova, msgp->size, msgp->flags, 6018e899e6SJohn Levon container->proxy->async_ops); 6118e899e6SJohn Levon 6218e899e6SJohn Levon if (container->proxy->async_ops) { 6318e899e6SJohn Levon if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, NULL, 6418e899e6SJohn Levon 0, &local_err)) { 6518e899e6SJohn Levon error_report_err(local_err); 6618e899e6SJohn Levon ret = -EFAULT; 6718e899e6SJohn Levon } 6818e899e6SJohn Levon } else { 6918e899e6SJohn Levon if (!vfio_user_send_wait(container->proxy, &msgp->hdr, NULL, 7018e899e6SJohn Levon 0, &local_err)) { 7118e899e6SJohn Levon error_report_err(local_err); 7218e899e6SJohn Levon ret = -EFAULT; 7318e899e6SJohn Levon } 7418e899e6SJohn Levon 7518e899e6SJohn Levon if (msgp->hdr.flags & VFIO_USER_ERROR) { 7618e899e6SJohn Levon ret = -msgp->hdr.error_reply; 7718e899e6SJohn Levon } 7818e899e6SJohn Levon 7918e899e6SJohn Levon g_free(msgp); 8018e899e6SJohn Levon } 8118e899e6SJohn Levon 8218e899e6SJohn Levon return ret; 839fca2b7dSJohn Levon } 849fca2b7dSJohn Levon 859fca2b7dSJohn Levon static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, 869fca2b7dSJohn Levon ram_addr_t size, void *vaddr, bool readonly, 879fca2b7dSJohn Levon MemoryRegion *mrp) 889fca2b7dSJohn Levon { 8918e899e6SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, 9018e899e6SJohn Levon bcontainer); 9118e899e6SJohn Levon int fd = memory_region_get_fd(mrp); 9218e899e6SJohn Levon Error *local_err = NULL; 93*622740aaSJohn Levon int ret = 0; 9418e899e6SJohn Levon 9518e899e6SJohn Levon VFIOUserFDs *fds = NULL; 9618e899e6SJohn Levon VFIOUserDMAMap *msgp = g_malloc0(sizeof(*msgp)); 9718e899e6SJohn Levon 9818e899e6SJohn Levon vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_MAP, sizeof(*msgp), 0); 9918e899e6SJohn Levon msgp->argsz = sizeof(struct vfio_iommu_type1_dma_map); 10018e899e6SJohn Levon msgp->flags = VFIO_DMA_MAP_FLAG_READ; 10118e899e6SJohn Levon msgp->offset = 0; 10218e899e6SJohn Levon msgp->iova = iova; 10318e899e6SJohn Levon msgp->size = size; 10418e899e6SJohn Levon 10518e899e6SJohn Levon /* 10618e899e6SJohn Levon * vaddr enters as a QEMU process address; make it either a file offset 10718e899e6SJohn Levon * for mapped areas or leave as 0. 10818e899e6SJohn Levon */ 10918e899e6SJohn Levon if (fd != -1) { 11018e899e6SJohn Levon msgp->offset = qemu_ram_block_host_offset(mrp->ram_block, vaddr); 11118e899e6SJohn Levon } 11218e899e6SJohn Levon 11318e899e6SJohn Levon if (!readonly) { 11418e899e6SJohn Levon msgp->flags |= VFIO_DMA_MAP_FLAG_WRITE; 11518e899e6SJohn Levon } 11618e899e6SJohn Levon 11718e899e6SJohn Levon trace_vfio_user_dma_map(msgp->iova, msgp->size, msgp->offset, msgp->flags, 11818e899e6SJohn Levon container->proxy->async_ops); 11918e899e6SJohn Levon 12018e899e6SJohn Levon /* 12118e899e6SJohn Levon * The async_ops case sends without blocking. They're later waited for in 12218e899e6SJohn Levon * vfio_send_wait_reqs. 12318e899e6SJohn Levon */ 12418e899e6SJohn Levon if (container->proxy->async_ops) { 12518e899e6SJohn Levon /* can't use auto variable since we don't block */ 12618e899e6SJohn Levon if (fd != -1) { 12718e899e6SJohn Levon fds = vfio_user_getfds(1); 12818e899e6SJohn Levon fds->send_fds = 1; 12918e899e6SJohn Levon fds->fds[0] = fd; 13018e899e6SJohn Levon } 13118e899e6SJohn Levon 13218e899e6SJohn Levon if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, fds, 13318e899e6SJohn Levon 0, &local_err)) { 13418e899e6SJohn Levon error_report_err(local_err); 13518e899e6SJohn Levon ret = -EFAULT; 13618e899e6SJohn Levon } 13718e899e6SJohn Levon } else { 13818e899e6SJohn Levon VFIOUserFDs local_fds = { 1, 0, &fd }; 13918e899e6SJohn Levon 14018e899e6SJohn Levon fds = fd != -1 ? &local_fds : NULL; 14118e899e6SJohn Levon 14218e899e6SJohn Levon if (!vfio_user_send_wait(container->proxy, &msgp->hdr, fds, 14318e899e6SJohn Levon 0, &local_err)) { 14418e899e6SJohn Levon error_report_err(local_err); 14518e899e6SJohn Levon ret = -EFAULT; 14618e899e6SJohn Levon } 14718e899e6SJohn Levon 14818e899e6SJohn Levon if (msgp->hdr.flags & VFIO_USER_ERROR) { 14918e899e6SJohn Levon ret = -msgp->hdr.error_reply; 15018e899e6SJohn Levon } 15118e899e6SJohn Levon 15218e899e6SJohn Levon g_free(msgp); 15318e899e6SJohn Levon } 15418e899e6SJohn Levon 15518e899e6SJohn Levon return ret; 1569fca2b7dSJohn Levon } 1579fca2b7dSJohn Levon 1589fca2b7dSJohn Levon static int 1599fca2b7dSJohn Levon vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, 1609fca2b7dSJohn Levon bool start, Error **errp) 1619fca2b7dSJohn Levon { 1629fca2b7dSJohn Levon error_setg_errno(errp, ENOTSUP, "Not supported"); 1639fca2b7dSJohn Levon return -ENOTSUP; 1649fca2b7dSJohn Levon } 1659fca2b7dSJohn Levon 1669fca2b7dSJohn Levon static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer, 1679fca2b7dSJohn Levon VFIOBitmap *vbmap, hwaddr iova, 1689fca2b7dSJohn Levon hwaddr size, Error **errp) 1699fca2b7dSJohn Levon { 1709fca2b7dSJohn Levon error_setg_errno(errp, ENOTSUP, "Not supported"); 1719fca2b7dSJohn Levon return -ENOTSUP; 1729fca2b7dSJohn Levon } 1739fca2b7dSJohn Levon 1749fca2b7dSJohn Levon static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp) 1759fca2b7dSJohn Levon { 17652ce9c35SJohn Levon VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, 17752ce9c35SJohn Levon bcontainer); 17852ce9c35SJohn Levon 17952ce9c35SJohn Levon assert(container->proxy->dma_pgsizes != 0); 18052ce9c35SJohn Levon bcontainer->pgsizes = container->proxy->dma_pgsizes; 18152ce9c35SJohn Levon bcontainer->dma_max_mappings = container->proxy->max_dma; 18252ce9c35SJohn Levon 18352ce9c35SJohn Levon /* No live migration support yet. */ 18452ce9c35SJohn Levon bcontainer->dirty_pages_supported = false; 18552ce9c35SJohn Levon bcontainer->max_dirty_bitmap_size = container->proxy->max_bitmap; 18652ce9c35SJohn Levon bcontainer->dirty_pgsizes = container->proxy->migr_pgsize; 18752ce9c35SJohn Levon 18852ce9c35SJohn Levon return true; 1899fca2b7dSJohn Levon } 1909fca2b7dSJohn Levon 19152ce9c35SJohn Levon static VFIOUserContainer *vfio_user_create_container(VFIODevice *vbasedev, 19252ce9c35SJohn Levon Error **errp) 1939fca2b7dSJohn Levon { 1949fca2b7dSJohn Levon VFIOUserContainer *container; 1959fca2b7dSJohn Levon 1969fca2b7dSJohn Levon container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER)); 19752ce9c35SJohn Levon container->proxy = vbasedev->proxy; 1989fca2b7dSJohn Levon return container; 1999fca2b7dSJohn Levon } 2009fca2b7dSJohn Levon 2019fca2b7dSJohn Levon /* 2029fca2b7dSJohn Levon * Try to mirror vfio_container_connect() as much as possible. 2039fca2b7dSJohn Levon */ 2049fca2b7dSJohn Levon static VFIOUserContainer * 20552ce9c35SJohn Levon vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev, 20652ce9c35SJohn Levon Error **errp) 2079fca2b7dSJohn Levon { 2089fca2b7dSJohn Levon VFIOContainerBase *bcontainer; 2099fca2b7dSJohn Levon VFIOUserContainer *container; 2109fca2b7dSJohn Levon VFIOAddressSpace *space; 2119fca2b7dSJohn Levon VFIOIOMMUClass *vioc; 21252ce9c35SJohn Levon int ret; 2139fca2b7dSJohn Levon 2149fca2b7dSJohn Levon space = vfio_address_space_get(as); 2159fca2b7dSJohn Levon 21652ce9c35SJohn Levon container = vfio_user_create_container(vbasedev, errp); 2179fca2b7dSJohn Levon if (!container) { 2189fca2b7dSJohn Levon goto put_space_exit; 2199fca2b7dSJohn Levon } 2209fca2b7dSJohn Levon 2219fca2b7dSJohn Levon bcontainer = &container->bcontainer; 2229fca2b7dSJohn Levon 22352ce9c35SJohn Levon ret = ram_block_uncoordinated_discard_disable(true); 22452ce9c35SJohn Levon if (ret) { 22552ce9c35SJohn Levon error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); 226f7c5dff2SMark Cave-Ayland goto free_container_exit; 22752ce9c35SJohn Levon } 22852ce9c35SJohn Levon 2299fca2b7dSJohn Levon vioc = VFIO_IOMMU_GET_CLASS(bcontainer); 2309fca2b7dSJohn Levon assert(vioc->setup); 2319fca2b7dSJohn Levon 2329fca2b7dSJohn Levon if (!vioc->setup(bcontainer, errp)) { 23352ce9c35SJohn Levon goto enable_discards_exit; 2349fca2b7dSJohn Levon } 2359fca2b7dSJohn Levon 2369fca2b7dSJohn Levon vfio_address_space_insert(space, bcontainer); 2379fca2b7dSJohn Levon 2389fca2b7dSJohn Levon if (!vfio_listener_register(bcontainer, errp)) { 2399fca2b7dSJohn Levon goto listener_release_exit; 2409fca2b7dSJohn Levon } 2419fca2b7dSJohn Levon 2429fca2b7dSJohn Levon bcontainer->initialized = true; 2439fca2b7dSJohn Levon 2449fca2b7dSJohn Levon return container; 2459fca2b7dSJohn Levon 2469fca2b7dSJohn Levon listener_release_exit: 2479fca2b7dSJohn Levon vfio_listener_unregister(bcontainer); 2489fca2b7dSJohn Levon if (vioc->release) { 2499fca2b7dSJohn Levon vioc->release(bcontainer); 2509fca2b7dSJohn Levon } 2519fca2b7dSJohn Levon 25252ce9c35SJohn Levon enable_discards_exit: 25352ce9c35SJohn Levon ram_block_uncoordinated_discard_disable(false); 25452ce9c35SJohn Levon 2559fca2b7dSJohn Levon free_container_exit: 2569fca2b7dSJohn Levon object_unref(container); 2579fca2b7dSJohn Levon 2589fca2b7dSJohn Levon put_space_exit: 2599fca2b7dSJohn Levon vfio_address_space_put(space); 2609fca2b7dSJohn Levon 2619fca2b7dSJohn Levon return NULL; 2629fca2b7dSJohn Levon } 2639fca2b7dSJohn Levon 2649fca2b7dSJohn Levon static void vfio_user_container_disconnect(VFIOUserContainer *container) 2659fca2b7dSJohn Levon { 2669fca2b7dSJohn Levon VFIOContainerBase *bcontainer = &container->bcontainer; 2679fca2b7dSJohn Levon VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); 26852ce9c35SJohn Levon VFIOAddressSpace *space = bcontainer->space; 26952ce9c35SJohn Levon 27052ce9c35SJohn Levon ram_block_uncoordinated_discard_disable(false); 2719fca2b7dSJohn Levon 2729fca2b7dSJohn Levon vfio_listener_unregister(bcontainer); 2739fca2b7dSJohn Levon if (vioc->release) { 2749fca2b7dSJohn Levon vioc->release(bcontainer); 2759fca2b7dSJohn Levon } 2769fca2b7dSJohn Levon 2779fca2b7dSJohn Levon object_unref(container); 2789fca2b7dSJohn Levon 2799fca2b7dSJohn Levon vfio_address_space_put(space); 2809fca2b7dSJohn Levon } 2819fca2b7dSJohn Levon 2829fca2b7dSJohn Levon static bool vfio_user_device_get(VFIOUserContainer *container, 2839fca2b7dSJohn Levon VFIODevice *vbasedev, Error **errp) 2849fca2b7dSJohn Levon { 2853bdb738bSJohn Levon struct vfio_device_info info = { .argsz = sizeof(info) }; 2863bdb738bSJohn Levon 2873bdb738bSJohn Levon 2883bdb738bSJohn Levon if (!vfio_user_get_device_info(vbasedev->proxy, &info, errp)) { 2893bdb738bSJohn Levon return false; 2903bdb738bSJohn Levon } 2919fca2b7dSJohn Levon 2929fca2b7dSJohn Levon vbasedev->fd = -1; 2939fca2b7dSJohn Levon 2949fca2b7dSJohn Levon vfio_device_prepare(vbasedev, &container->bcontainer, &info); 2959fca2b7dSJohn Levon 2969fca2b7dSJohn Levon return true; 2979fca2b7dSJohn Levon } 2989fca2b7dSJohn Levon 2999fca2b7dSJohn Levon /* 3009fca2b7dSJohn Levon * vfio_user_device_attach: attach a device to a new container. 3019fca2b7dSJohn Levon */ 3029fca2b7dSJohn Levon static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev, 3039fca2b7dSJohn Levon AddressSpace *as, Error **errp) 3049fca2b7dSJohn Levon { 3059fca2b7dSJohn Levon VFIOUserContainer *container; 3069fca2b7dSJohn Levon 30752ce9c35SJohn Levon container = vfio_user_container_connect(as, vbasedev, errp); 3089fca2b7dSJohn Levon if (container == NULL) { 3099fca2b7dSJohn Levon error_prepend(errp, "failed to connect proxy"); 3109fca2b7dSJohn Levon return false; 3119fca2b7dSJohn Levon } 3129fca2b7dSJohn Levon 3139fca2b7dSJohn Levon return vfio_user_device_get(container, vbasedev, errp); 3149fca2b7dSJohn Levon } 3159fca2b7dSJohn Levon 3169fca2b7dSJohn Levon static void vfio_user_device_detach(VFIODevice *vbasedev) 3179fca2b7dSJohn Levon { 3189fca2b7dSJohn Levon VFIOUserContainer *container = container_of(vbasedev->bcontainer, 3199fca2b7dSJohn Levon VFIOUserContainer, bcontainer); 3209fca2b7dSJohn Levon 3219fca2b7dSJohn Levon vfio_device_unprepare(vbasedev); 3229fca2b7dSJohn Levon 3239fca2b7dSJohn Levon vfio_user_container_disconnect(container); 3249fca2b7dSJohn Levon } 3259fca2b7dSJohn Levon 3269fca2b7dSJohn Levon static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single) 3279fca2b7dSJohn Levon { 3289fca2b7dSJohn Levon /* ->needs_reset is always false for vfio-user. */ 3299fca2b7dSJohn Levon return 0; 3309fca2b7dSJohn Levon } 3319fca2b7dSJohn Levon 3329fca2b7dSJohn Levon static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data) 3339fca2b7dSJohn Levon { 3349fca2b7dSJohn Levon VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); 3359fca2b7dSJohn Levon 3369fca2b7dSJohn Levon vioc->setup = vfio_user_setup; 33718e899e6SJohn Levon vioc->listener_begin = vfio_user_listener_begin, 33818e899e6SJohn Levon vioc->listener_commit = vfio_user_listener_commit, 3399fca2b7dSJohn Levon vioc->dma_map = vfio_user_dma_map; 3409fca2b7dSJohn Levon vioc->dma_unmap = vfio_user_dma_unmap; 3419fca2b7dSJohn Levon vioc->attach_device = vfio_user_device_attach; 3429fca2b7dSJohn Levon vioc->detach_device = vfio_user_device_detach; 3439fca2b7dSJohn Levon vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking; 3449fca2b7dSJohn Levon vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap; 3459fca2b7dSJohn Levon vioc->pci_hot_reset = vfio_user_pci_hot_reset; 3469fca2b7dSJohn Levon }; 3479fca2b7dSJohn Levon 3489fca2b7dSJohn Levon static const TypeInfo types[] = { 3499fca2b7dSJohn Levon { 3509fca2b7dSJohn Levon .name = TYPE_VFIO_IOMMU_USER, 3519fca2b7dSJohn Levon .parent = TYPE_VFIO_IOMMU, 3529fca2b7dSJohn Levon .instance_size = sizeof(VFIOUserContainer), 3539fca2b7dSJohn Levon .class_init = vfio_iommu_user_class_init, 3549fca2b7dSJohn Levon }, 3559fca2b7dSJohn Levon }; 3569fca2b7dSJohn Levon 3579fca2b7dSJohn Levon DEFINE_TYPES(types) 358