16e6d8ac6SEric Auger /* 26e6d8ac6SEric Auger * iommufd container backend 36e6d8ac6SEric Auger * 46e6d8ac6SEric Auger * Copyright (C) 2023 Intel Corporation. 56e6d8ac6SEric Auger * Copyright Red Hat, Inc. 2023 66e6d8ac6SEric Auger * 76e6d8ac6SEric Auger * Authors: Yi Liu <yi.l.liu@intel.com> 86e6d8ac6SEric Auger * Eric Auger <eric.auger@redhat.com> 96e6d8ac6SEric Auger * 106e6d8ac6SEric Auger * SPDX-License-Identifier: GPL-2.0-or-later 116e6d8ac6SEric Auger */ 126e6d8ac6SEric Auger 136e6d8ac6SEric Auger #include "qemu/osdep.h" 146e6d8ac6SEric Auger #include "sysemu/iommufd.h" 156e6d8ac6SEric Auger #include "qapi/error.h" 166e6d8ac6SEric Auger #include "qemu/module.h" 176e6d8ac6SEric Auger #include "qom/object_interfaces.h" 186e6d8ac6SEric Auger #include "qemu/error-report.h" 196e6d8ac6SEric Auger #include "monitor/monitor.h" 206e6d8ac6SEric Auger #include "trace.h" 216c635326SJoao Martins #include "hw/vfio/vfio-common.h" 226e6d8ac6SEric Auger #include <sys/ioctl.h> 236e6d8ac6SEric Auger #include <linux/iommufd.h> 246e6d8ac6SEric Auger 256e6d8ac6SEric Auger static void iommufd_backend_init(Object *obj) 266e6d8ac6SEric Auger { 276e6d8ac6SEric Auger IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); 286e6d8ac6SEric Auger 296e6d8ac6SEric Auger be->fd = -1; 306e6d8ac6SEric Auger be->users = 0; 316e6d8ac6SEric Auger be->owned = true; 326e6d8ac6SEric Auger } 336e6d8ac6SEric Auger 346e6d8ac6SEric Auger static void iommufd_backend_finalize(Object *obj) 356e6d8ac6SEric Auger { 366e6d8ac6SEric Auger IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); 376e6d8ac6SEric Auger 386e6d8ac6SEric Auger if (be->owned) { 396e6d8ac6SEric Auger close(be->fd); 406e6d8ac6SEric Auger be->fd = -1; 416e6d8ac6SEric Auger } 426e6d8ac6SEric Auger } 436e6d8ac6SEric Auger 446e6d8ac6SEric Auger static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) 456e6d8ac6SEric Auger { 46c1cccad8SZhao Liu ERRP_GUARD(); 476e6d8ac6SEric Auger IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); 486e6d8ac6SEric Auger int fd = -1; 496e6d8ac6SEric Auger 506e6d8ac6SEric Auger fd = monitor_fd_param(monitor_cur(), str, errp); 516e6d8ac6SEric Auger if (fd == -1) { 526e6d8ac6SEric Auger error_prepend(errp, "Could not parse remote object fd %s:", str); 536e6d8ac6SEric Auger return; 546e6d8ac6SEric Auger } 556e6d8ac6SEric Auger be->fd = fd; 566e6d8ac6SEric Auger be->owned = false; 576e6d8ac6SEric Auger trace_iommu_backend_set_fd(be->fd); 586e6d8ac6SEric Auger } 596e6d8ac6SEric Auger 606e6d8ac6SEric Auger static bool iommufd_backend_can_be_deleted(UserCreatable *uc) 616e6d8ac6SEric Auger { 626e6d8ac6SEric Auger IOMMUFDBackend *be = IOMMUFD_BACKEND(uc); 636e6d8ac6SEric Auger 646e6d8ac6SEric Auger return !be->users; 656e6d8ac6SEric Auger } 666e6d8ac6SEric Auger 676e6d8ac6SEric Auger static void iommufd_backend_class_init(ObjectClass *oc, void *data) 686e6d8ac6SEric Auger { 696e6d8ac6SEric Auger UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); 706e6d8ac6SEric Auger 716e6d8ac6SEric Auger ucc->can_be_deleted = iommufd_backend_can_be_deleted; 726e6d8ac6SEric Auger 736e6d8ac6SEric Auger object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); 746e6d8ac6SEric Auger } 756e6d8ac6SEric Auger 769067d50dSZhenzhong Duan bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) 776e6d8ac6SEric Auger { 789067d50dSZhenzhong Duan int fd; 796e6d8ac6SEric Auger 806e6d8ac6SEric Auger if (be->owned && !be->users) { 8147cd2f1aSZhao Liu fd = qemu_open("/dev/iommu", O_RDWR, errp); 826e6d8ac6SEric Auger if (fd < 0) { 839067d50dSZhenzhong Duan return false; 846e6d8ac6SEric Auger } 856e6d8ac6SEric Auger be->fd = fd; 866e6d8ac6SEric Auger } 876e6d8ac6SEric Auger be->users++; 889067d50dSZhenzhong Duan 899067d50dSZhenzhong Duan trace_iommufd_backend_connect(be->fd, be->owned, be->users); 909067d50dSZhenzhong Duan return true; 916e6d8ac6SEric Auger } 926e6d8ac6SEric Auger 936e6d8ac6SEric Auger void iommufd_backend_disconnect(IOMMUFDBackend *be) 946e6d8ac6SEric Auger { 956e6d8ac6SEric Auger if (!be->users) { 966e6d8ac6SEric Auger goto out; 976e6d8ac6SEric Auger } 986e6d8ac6SEric Auger be->users--; 996e6d8ac6SEric Auger if (!be->users && be->owned) { 1006e6d8ac6SEric Auger close(be->fd); 1016e6d8ac6SEric Auger be->fd = -1; 1026e6d8ac6SEric Auger } 1036e6d8ac6SEric Auger out: 1046e6d8ac6SEric Auger trace_iommufd_backend_disconnect(be->fd, be->users); 1056e6d8ac6SEric Auger } 1066e6d8ac6SEric Auger 1079067d50dSZhenzhong Duan bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, 1086e6d8ac6SEric Auger Error **errp) 1096e6d8ac6SEric Auger { 1109067d50dSZhenzhong Duan int fd = be->fd; 1116e6d8ac6SEric Auger struct iommu_ioas_alloc alloc_data = { 1126e6d8ac6SEric Auger .size = sizeof(alloc_data), 1136e6d8ac6SEric Auger .flags = 0, 1146e6d8ac6SEric Auger }; 1156e6d8ac6SEric Auger 1169067d50dSZhenzhong Duan if (ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data)) { 1176e6d8ac6SEric Auger error_setg_errno(errp, errno, "Failed to allocate ioas"); 1189067d50dSZhenzhong Duan return false; 1196e6d8ac6SEric Auger } 1206e6d8ac6SEric Auger 1216e6d8ac6SEric Auger *ioas_id = alloc_data.out_ioas_id; 1229067d50dSZhenzhong Duan trace_iommufd_backend_alloc_ioas(fd, *ioas_id); 1236e6d8ac6SEric Auger 1249067d50dSZhenzhong Duan return true; 1256e6d8ac6SEric Auger } 1266e6d8ac6SEric Auger 1276e6d8ac6SEric Auger void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id) 1286e6d8ac6SEric Auger { 1296e6d8ac6SEric Auger int ret, fd = be->fd; 1306e6d8ac6SEric Auger struct iommu_destroy des = { 1316e6d8ac6SEric Auger .size = sizeof(des), 1326e6d8ac6SEric Auger .id = id, 1336e6d8ac6SEric Auger }; 1346e6d8ac6SEric Auger 1356e6d8ac6SEric Auger ret = ioctl(fd, IOMMU_DESTROY, &des); 1366e6d8ac6SEric Auger trace_iommufd_backend_free_id(fd, id, ret); 1376e6d8ac6SEric Auger if (ret) { 1386e6d8ac6SEric Auger error_report("Failed to free id: %u %m", id); 1396e6d8ac6SEric Auger } 1406e6d8ac6SEric Auger } 1416e6d8ac6SEric Auger 1426e6d8ac6SEric Auger int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, 1436e6d8ac6SEric Auger ram_addr_t size, void *vaddr, bool readonly) 1446e6d8ac6SEric Auger { 1456e6d8ac6SEric Auger int ret, fd = be->fd; 1466e6d8ac6SEric Auger struct iommu_ioas_map map = { 1476e6d8ac6SEric Auger .size = sizeof(map), 1486e6d8ac6SEric Auger .flags = IOMMU_IOAS_MAP_READABLE | 1496e6d8ac6SEric Auger IOMMU_IOAS_MAP_FIXED_IOVA, 1506e6d8ac6SEric Auger .ioas_id = ioas_id, 1516e6d8ac6SEric Auger .__reserved = 0, 1526e6d8ac6SEric Auger .user_va = (uintptr_t)vaddr, 1536e6d8ac6SEric Auger .iova = iova, 1546e6d8ac6SEric Auger .length = size, 1556e6d8ac6SEric Auger }; 1566e6d8ac6SEric Auger 1576e6d8ac6SEric Auger if (!readonly) { 1586e6d8ac6SEric Auger map.flags |= IOMMU_IOAS_MAP_WRITEABLE; 1596e6d8ac6SEric Auger } 1606e6d8ac6SEric Auger 1616e6d8ac6SEric Auger ret = ioctl(fd, IOMMU_IOAS_MAP, &map); 1626e6d8ac6SEric Auger trace_iommufd_backend_map_dma(fd, ioas_id, iova, size, 1636e6d8ac6SEric Auger vaddr, readonly, ret); 1646e6d8ac6SEric Auger if (ret) { 1656e6d8ac6SEric Auger ret = -errno; 1666e6d8ac6SEric Auger 1676e6d8ac6SEric Auger /* TODO: Not support mapping hardware PCI BAR region for now. */ 1686e6d8ac6SEric Auger if (errno == EFAULT) { 1696e6d8ac6SEric Auger warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?"); 1706e6d8ac6SEric Auger } else { 1716e6d8ac6SEric Auger error_report("IOMMU_IOAS_MAP failed: %m"); 1726e6d8ac6SEric Auger } 1736e6d8ac6SEric Auger } 1746e6d8ac6SEric Auger return ret; 1756e6d8ac6SEric Auger } 1766e6d8ac6SEric Auger 1776e6d8ac6SEric Auger int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, 1786e6d8ac6SEric Auger hwaddr iova, ram_addr_t size) 1796e6d8ac6SEric Auger { 1806e6d8ac6SEric Auger int ret, fd = be->fd; 1816e6d8ac6SEric Auger struct iommu_ioas_unmap unmap = { 1826e6d8ac6SEric Auger .size = sizeof(unmap), 1836e6d8ac6SEric Auger .ioas_id = ioas_id, 1846e6d8ac6SEric Auger .iova = iova, 1856e6d8ac6SEric Auger .length = size, 1866e6d8ac6SEric Auger }; 1876e6d8ac6SEric Auger 1886e6d8ac6SEric Auger ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap); 1896e6d8ac6SEric Auger /* 1906e6d8ac6SEric Auger * IOMMUFD takes mapping as some kind of object, unmapping 1916e6d8ac6SEric Auger * nonexistent mapping is treated as deleting a nonexistent 1926e6d8ac6SEric Auger * object and return ENOENT. This is different from legacy 1936e6d8ac6SEric Auger * backend which allows it. vIOMMU may trigger a lot of 1946e6d8ac6SEric Auger * redundant unmapping, to avoid flush the log, treat them 1956e6d8ac6SEric Auger * as succeess for IOMMUFD just like legacy backend. 1966e6d8ac6SEric Auger */ 1976e6d8ac6SEric Auger if (ret && errno == ENOENT) { 1986e6d8ac6SEric Auger trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret); 1996e6d8ac6SEric Auger ret = 0; 2006e6d8ac6SEric Auger } else { 2016e6d8ac6SEric Auger trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret); 2026e6d8ac6SEric Auger } 2036e6d8ac6SEric Auger 2046e6d8ac6SEric Auger if (ret) { 2056e6d8ac6SEric Auger ret = -errno; 2066e6d8ac6SEric Auger error_report("IOMMU_IOAS_UNMAP failed: %m"); 2076e6d8ac6SEric Auger } 2086e6d8ac6SEric Auger return ret; 2096e6d8ac6SEric Auger } 2106e6d8ac6SEric Auger 2115b1e96e6SJoao Martins bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id, 2125b1e96e6SJoao Martins uint32_t pt_id, uint32_t flags, 2135b1e96e6SJoao Martins uint32_t data_type, uint32_t data_len, 2145b1e96e6SJoao Martins void *data_ptr, uint32_t *out_hwpt, 2155b1e96e6SJoao Martins Error **errp) 2165b1e96e6SJoao Martins { 2175b1e96e6SJoao Martins int ret, fd = be->fd; 2185b1e96e6SJoao Martins struct iommu_hwpt_alloc alloc_hwpt = { 2195b1e96e6SJoao Martins .size = sizeof(struct iommu_hwpt_alloc), 2205b1e96e6SJoao Martins .flags = flags, 2215b1e96e6SJoao Martins .dev_id = dev_id, 2225b1e96e6SJoao Martins .pt_id = pt_id, 2235b1e96e6SJoao Martins .data_type = data_type, 2245b1e96e6SJoao Martins .data_len = data_len, 2255b1e96e6SJoao Martins .data_uptr = (uintptr_t)data_ptr, 2265b1e96e6SJoao Martins }; 2275b1e96e6SJoao Martins 2285b1e96e6SJoao Martins ret = ioctl(fd, IOMMU_HWPT_ALLOC, &alloc_hwpt); 2295b1e96e6SJoao Martins trace_iommufd_backend_alloc_hwpt(fd, dev_id, pt_id, flags, data_type, 2305b1e96e6SJoao Martins data_len, (uintptr_t)data_ptr, 2315b1e96e6SJoao Martins alloc_hwpt.out_hwpt_id, ret); 2325b1e96e6SJoao Martins if (ret) { 2335b1e96e6SJoao Martins error_setg_errno(errp, errno, "Failed to allocate hwpt"); 2345b1e96e6SJoao Martins return false; 2355b1e96e6SJoao Martins } 2365b1e96e6SJoao Martins 2375b1e96e6SJoao Martins *out_hwpt = alloc_hwpt.out_hwpt_id; 2385b1e96e6SJoao Martins return true; 2395b1e96e6SJoao Martins } 2405b1e96e6SJoao Martins 241*52ce8822SJoao Martins bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, 242*52ce8822SJoao Martins uint32_t hwpt_id, bool start, 243*52ce8822SJoao Martins Error **errp) 244*52ce8822SJoao Martins { 245*52ce8822SJoao Martins int ret; 246*52ce8822SJoao Martins struct iommu_hwpt_set_dirty_tracking set_dirty = { 247*52ce8822SJoao Martins .size = sizeof(set_dirty), 248*52ce8822SJoao Martins .hwpt_id = hwpt_id, 249*52ce8822SJoao Martins .flags = start ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0, 250*52ce8822SJoao Martins }; 251*52ce8822SJoao Martins 252*52ce8822SJoao Martins ret = ioctl(be->fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &set_dirty); 253*52ce8822SJoao Martins trace_iommufd_backend_set_dirty(be->fd, hwpt_id, start, ret ? errno : 0); 254*52ce8822SJoao Martins if (ret) { 255*52ce8822SJoao Martins error_setg_errno(errp, errno, 256*52ce8822SJoao Martins "IOMMU_HWPT_SET_DIRTY_TRACKING(hwpt_id %u) failed", 257*52ce8822SJoao Martins hwpt_id); 258*52ce8822SJoao Martins return false; 259*52ce8822SJoao Martins } 260*52ce8822SJoao Martins 261*52ce8822SJoao Martins return true; 262*52ce8822SJoao Martins } 263*52ce8822SJoao Martins 26442965386SZhenzhong Duan bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, 26542965386SZhenzhong Duan uint32_t *type, void *data, uint32_t len, 2662d1bf258SJoao Martins uint64_t *caps, Error **errp) 26742965386SZhenzhong Duan { 26842965386SZhenzhong Duan struct iommu_hw_info info = { 26942965386SZhenzhong Duan .size = sizeof(info), 27042965386SZhenzhong Duan .dev_id = devid, 27142965386SZhenzhong Duan .data_len = len, 27242965386SZhenzhong Duan .data_uptr = (uintptr_t)data, 27342965386SZhenzhong Duan }; 27442965386SZhenzhong Duan 27542965386SZhenzhong Duan if (ioctl(be->fd, IOMMU_GET_HW_INFO, &info)) { 27642965386SZhenzhong Duan error_setg_errno(errp, errno, "Failed to get hardware info"); 27742965386SZhenzhong Duan return false; 27842965386SZhenzhong Duan } 27942965386SZhenzhong Duan 28042965386SZhenzhong Duan g_assert(type); 28142965386SZhenzhong Duan *type = info.out_data_type; 2822d1bf258SJoao Martins g_assert(caps); 2832d1bf258SJoao Martins *caps = info.out_capabilities; 28442965386SZhenzhong Duan 28542965386SZhenzhong Duan return true; 28642965386SZhenzhong Duan } 28742965386SZhenzhong Duan 28863c6e83eSZhenzhong Duan static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp) 28963c6e83eSZhenzhong Duan { 29063c6e83eSZhenzhong Duan HostIOMMUDeviceCaps *caps = &hiod->caps; 29163c6e83eSZhenzhong Duan 29263c6e83eSZhenzhong Duan switch (cap) { 29363c6e83eSZhenzhong Duan case HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE: 29463c6e83eSZhenzhong Duan return caps->type; 29563c6e83eSZhenzhong Duan case HOST_IOMMU_DEVICE_CAP_AW_BITS: 2966c635326SJoao Martins return vfio_device_get_aw_bits(hiod->agent); 29763c6e83eSZhenzhong Duan default: 29863c6e83eSZhenzhong Duan error_setg(errp, "%s: unsupported capability %x", hiod->name, cap); 29963c6e83eSZhenzhong Duan return -EINVAL; 30063c6e83eSZhenzhong Duan } 30163c6e83eSZhenzhong Duan } 30263c6e83eSZhenzhong Duan 30363c6e83eSZhenzhong Duan static void hiod_iommufd_class_init(ObjectClass *oc, void *data) 30463c6e83eSZhenzhong Duan { 30563c6e83eSZhenzhong Duan HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc); 30663c6e83eSZhenzhong Duan 30763c6e83eSZhenzhong Duan hioc->get_cap = hiod_iommufd_get_cap; 30863c6e83eSZhenzhong Duan }; 30963c6e83eSZhenzhong Duan 3109005f928SZhenzhong Duan static const TypeInfo types[] = { 3119005f928SZhenzhong Duan { 3126e6d8ac6SEric Auger .name = TYPE_IOMMUFD_BACKEND, 3136e6d8ac6SEric Auger .parent = TYPE_OBJECT, 3146e6d8ac6SEric Auger .instance_size = sizeof(IOMMUFDBackend), 3156e6d8ac6SEric Auger .instance_init = iommufd_backend_init, 3166e6d8ac6SEric Auger .instance_finalize = iommufd_backend_finalize, 3176e6d8ac6SEric Auger .class_size = sizeof(IOMMUFDBackendClass), 3186e6d8ac6SEric Auger .class_init = iommufd_backend_class_init, 3196e6d8ac6SEric Auger .interfaces = (InterfaceInfo[]) { 3206e6d8ac6SEric Auger { TYPE_USER_CREATABLE }, 3216e6d8ac6SEric Auger { } 3226e6d8ac6SEric Auger } 3239005f928SZhenzhong Duan }, { 3249005f928SZhenzhong Duan .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, 3259005f928SZhenzhong Duan .parent = TYPE_HOST_IOMMU_DEVICE, 32663c6e83eSZhenzhong Duan .class_init = hiod_iommufd_class_init, 3279005f928SZhenzhong Duan .abstract = true, 3289005f928SZhenzhong Duan } 3296e6d8ac6SEric Auger }; 3306e6d8ac6SEric Auger 3319005f928SZhenzhong Duan DEFINE_TYPES(types) 332