1 /* 2 * iommufd container backend 3 * 4 * Copyright (C) 2023 Intel Corporation. 5 * Copyright Red Hat, Inc. 2023 6 * 7 * Authors: Yi Liu <yi.l.liu@intel.com> 8 * Eric Auger <eric.auger@redhat.com> 9 * 10 * SPDX-License-Identifier: GPL-2.0-or-later 11 */ 12 13 #include "qemu/osdep.h" 14 #include "system/iommufd.h" 15 #include "qapi/error.h" 16 #include "qemu/module.h" 17 #include "qom/object_interfaces.h" 18 #include "qemu/error-report.h" 19 #include "migration/cpr.h" 20 #include "monitor/monitor.h" 21 #include "trace.h" 22 #include "hw/vfio/vfio-device.h" 23 #include <sys/ioctl.h> 24 #include <linux/iommufd.h> 25 26 static const char *iommufd_fd_name(IOMMUFDBackend *be) 27 { 28 return object_get_canonical_path_component(OBJECT(be)); 29 } 30 31 static void iommufd_backend_init(Object *obj) 32 { 33 IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); 34 35 be->fd = -1; 36 be->users = 0; 37 be->owned = true; 38 } 39 40 static void iommufd_backend_finalize(Object *obj) 41 { 42 IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); 43 44 if (be->owned) { 45 close(be->fd); 46 be->fd = -1; 47 } 48 } 49 50 static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) 51 { 52 ERRP_GUARD(); 53 IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); 54 int fd = -1; 55 56 fd = monitor_fd_param(monitor_cur(), str, errp); 57 if (fd == -1) { 58 error_prepend(errp, "Could not parse remote object fd %s:", str); 59 return; 60 } 61 be->fd = fd; 62 be->owned = false; 63 trace_iommu_backend_set_fd(be->fd); 64 } 65 66 static bool iommufd_backend_can_be_deleted(UserCreatable *uc) 67 { 68 IOMMUFDBackend *be = IOMMUFD_BACKEND(uc); 69 70 return !be->users; 71 } 72 73 static void iommufd_backend_complete(UserCreatable *uc, Error **errp) 74 { 75 IOMMUFDBackend *be = IOMMUFD_BACKEND(uc); 76 const char *name = iommufd_fd_name(be); 77 78 if (!be->owned) { 79 /* fd came from the command line. Fetch updated value from cpr state. */ 80 if (cpr_is_incoming()) { 81 be->fd = cpr_find_fd(name, 0); 82 } else { 83 cpr_save_fd(name, 0, be->fd); 84 } 85 } 86 } 87 88 static void iommufd_backend_class_init(ObjectClass *oc, const void *data) 89 { 90 UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); 91 92 ucc->can_be_deleted = iommufd_backend_can_be_deleted; 93 ucc->complete = iommufd_backend_complete; 94 95 object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); 96 } 97 98 bool iommufd_change_process_capable(IOMMUFDBackend *be) 99 { 100 struct iommu_ioas_change_process args = {.size = sizeof(args)}; 101 102 /* 103 * Call IOMMU_IOAS_CHANGE_PROCESS to verify it is a recognized ioctl. 104 * This is a no-op if the process has not changed since DMA was mapped. 105 */ 106 return !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args); 107 } 108 109 bool iommufd_change_process(IOMMUFDBackend *be, Error **errp) 110 { 111 struct iommu_ioas_change_process args = {.size = sizeof(args)}; 112 bool ret = !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args); 113 114 if (!ret) { 115 error_setg_errno(errp, errno, "IOMMU_IOAS_CHANGE_PROCESS fd %d failed", 116 be->fd); 117 } 118 trace_iommufd_change_process(be->fd, ret); 119 return ret; 120 } 121 122 bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) 123 { 124 int fd; 125 126 if (be->owned && !be->users) { 127 fd = cpr_open_fd("/dev/iommu", O_RDWR, iommufd_fd_name(be), 0, errp); 128 if (fd < 0) { 129 return false; 130 } 131 be->fd = fd; 132 } 133 if (!be->users && !vfio_iommufd_cpr_register_iommufd(be, errp)) { 134 if (be->owned) { 135 close(be->fd); 136 be->fd = -1; 137 } 138 return false; 139 } 140 be->users++; 141 142 trace_iommufd_backend_connect(be->fd, be->owned, be->users); 143 return true; 144 } 145 146 void iommufd_backend_disconnect(IOMMUFDBackend *be) 147 { 148 if (!be->users) { 149 goto out; 150 } 151 be->users--; 152 if (!be->users) { 153 vfio_iommufd_cpr_unregister_iommufd(be); 154 if (be->owned) { 155 cpr_delete_fd(iommufd_fd_name(be), 0); 156 close(be->fd); 157 be->fd = -1; 158 } 159 } 160 out: 161 trace_iommufd_backend_disconnect(be->fd, be->users); 162 } 163 164 bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, 165 Error **errp) 166 { 167 int fd = be->fd; 168 struct iommu_ioas_alloc alloc_data = { 169 .size = sizeof(alloc_data), 170 .flags = 0, 171 }; 172 173 if (ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data)) { 174 error_setg_errno(errp, errno, "Failed to allocate ioas"); 175 return false; 176 } 177 178 *ioas_id = alloc_data.out_ioas_id; 179 trace_iommufd_backend_alloc_ioas(fd, *ioas_id); 180 181 return true; 182 } 183 184 void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id) 185 { 186 int ret, fd = be->fd; 187 struct iommu_destroy des = { 188 .size = sizeof(des), 189 .id = id, 190 }; 191 192 ret = ioctl(fd, IOMMU_DESTROY, &des); 193 trace_iommufd_backend_free_id(fd, id, ret); 194 if (ret) { 195 error_report("Failed to free id: %u %m", id); 196 } 197 } 198 199 int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, 200 ram_addr_t size, void *vaddr, bool readonly) 201 { 202 int ret, fd = be->fd; 203 struct iommu_ioas_map map = { 204 .size = sizeof(map), 205 .flags = IOMMU_IOAS_MAP_READABLE | 206 IOMMU_IOAS_MAP_FIXED_IOVA, 207 .ioas_id = ioas_id, 208 .__reserved = 0, 209 .user_va = (uintptr_t)vaddr, 210 .iova = iova, 211 .length = size, 212 }; 213 214 if (!readonly) { 215 map.flags |= IOMMU_IOAS_MAP_WRITEABLE; 216 } 217 218 ret = ioctl(fd, IOMMU_IOAS_MAP, &map); 219 trace_iommufd_backend_map_dma(fd, ioas_id, iova, size, 220 vaddr, readonly, ret); 221 if (ret) { 222 ret = -errno; 223 224 /* TODO: Not support mapping hardware PCI BAR region for now. */ 225 if (errno == EFAULT) { 226 warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?"); 227 } 228 } 229 return ret; 230 } 231 232 int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id, 233 hwaddr iova, ram_addr_t size, 234 int mfd, unsigned long start, bool readonly) 235 { 236 int ret, fd = be->fd; 237 struct iommu_ioas_map_file map = { 238 .size = sizeof(map), 239 .flags = IOMMU_IOAS_MAP_READABLE | 240 IOMMU_IOAS_MAP_FIXED_IOVA, 241 .ioas_id = ioas_id, 242 .fd = mfd, 243 .start = start, 244 .iova = iova, 245 .length = size, 246 }; 247 248 if (cpr_is_incoming()) { 249 return 0; 250 } 251 252 if (!readonly) { 253 map.flags |= IOMMU_IOAS_MAP_WRITEABLE; 254 } 255 256 ret = ioctl(fd, IOMMU_IOAS_MAP_FILE, &map); 257 trace_iommufd_backend_map_file_dma(fd, ioas_id, iova, size, mfd, start, 258 readonly, ret); 259 if (ret) { 260 ret = -errno; 261 262 /* TODO: Not support mapping hardware PCI BAR region for now. */ 263 if (errno == EFAULT) { 264 warn_report("IOMMU_IOAS_MAP_FILE failed: %m, PCI BAR?"); 265 } 266 } 267 return ret; 268 } 269 270 int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, 271 hwaddr iova, ram_addr_t size) 272 { 273 int ret, fd = be->fd; 274 struct iommu_ioas_unmap unmap = { 275 .size = sizeof(unmap), 276 .ioas_id = ioas_id, 277 .iova = iova, 278 .length = size, 279 }; 280 281 if (cpr_is_incoming()) { 282 return 0; 283 } 284 285 ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap); 286 /* 287 * IOMMUFD takes mapping as some kind of object, unmapping 288 * nonexistent mapping is treated as deleting a nonexistent 289 * object and return ENOENT. This is different from legacy 290 * backend which allows it. vIOMMU may trigger a lot of 291 * redundant unmapping, to avoid flush the log, treat them 292 * as succeess for IOMMUFD just like legacy backend. 293 */ 294 if (ret && errno == ENOENT) { 295 trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret); 296 ret = 0; 297 } else { 298 trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret); 299 } 300 301 if (ret) { 302 ret = -errno; 303 } 304 return ret; 305 } 306 307 bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id, 308 uint32_t pt_id, uint32_t flags, 309 uint32_t data_type, uint32_t data_len, 310 void *data_ptr, uint32_t *out_hwpt, 311 Error **errp) 312 { 313 int ret, fd = be->fd; 314 struct iommu_hwpt_alloc alloc_hwpt = { 315 .size = sizeof(struct iommu_hwpt_alloc), 316 .flags = flags, 317 .dev_id = dev_id, 318 .pt_id = pt_id, 319 .data_type = data_type, 320 .data_len = data_len, 321 .data_uptr = (uintptr_t)data_ptr, 322 }; 323 324 ret = ioctl(fd, IOMMU_HWPT_ALLOC, &alloc_hwpt); 325 trace_iommufd_backend_alloc_hwpt(fd, dev_id, pt_id, flags, data_type, 326 data_len, (uintptr_t)data_ptr, 327 alloc_hwpt.out_hwpt_id, ret); 328 if (ret) { 329 error_setg_errno(errp, errno, "Failed to allocate hwpt"); 330 return false; 331 } 332 333 *out_hwpt = alloc_hwpt.out_hwpt_id; 334 return true; 335 } 336 337 bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, 338 uint32_t hwpt_id, bool start, 339 Error **errp) 340 { 341 int ret; 342 struct iommu_hwpt_set_dirty_tracking set_dirty = { 343 .size = sizeof(set_dirty), 344 .hwpt_id = hwpt_id, 345 .flags = start ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0, 346 }; 347 348 ret = ioctl(be->fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &set_dirty); 349 trace_iommufd_backend_set_dirty(be->fd, hwpt_id, start, ret ? errno : 0); 350 if (ret) { 351 error_setg_errno(errp, errno, 352 "IOMMU_HWPT_SET_DIRTY_TRACKING(hwpt_id %u) failed", 353 hwpt_id); 354 return false; 355 } 356 357 return true; 358 } 359 360 bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, 361 uint32_t hwpt_id, 362 uint64_t iova, ram_addr_t size, 363 uint64_t page_size, uint64_t *data, 364 Error **errp) 365 { 366 int ret; 367 struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap = { 368 .size = sizeof(get_dirty_bitmap), 369 .hwpt_id = hwpt_id, 370 .iova = iova, 371 .length = size, 372 .page_size = page_size, 373 .data = (uintptr_t)data, 374 }; 375 376 ret = ioctl(be->fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &get_dirty_bitmap); 377 trace_iommufd_backend_get_dirty_bitmap(be->fd, hwpt_id, iova, size, 378 page_size, ret ? errno : 0); 379 if (ret) { 380 error_setg_errno(errp, errno, 381 "IOMMU_HWPT_GET_DIRTY_BITMAP (iova: 0x%"HWADDR_PRIx 382 " size: 0x"RAM_ADDR_FMT") failed", iova, size); 383 return false; 384 } 385 386 return true; 387 } 388 389 bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, 390 uint32_t *type, void *data, uint32_t len, 391 uint64_t *caps, Error **errp) 392 { 393 struct iommu_hw_info info = { 394 .size = sizeof(info), 395 .dev_id = devid, 396 .data_len = len, 397 .data_uptr = (uintptr_t)data, 398 }; 399 400 if (ioctl(be->fd, IOMMU_GET_HW_INFO, &info)) { 401 error_setg_errno(errp, errno, "Failed to get hardware info"); 402 return false; 403 } 404 405 g_assert(type); 406 *type = info.out_data_type; 407 g_assert(caps); 408 *caps = info.out_capabilities; 409 410 return true; 411 } 412 413 bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id, 414 uint32_t data_type, uint32_t entry_len, 415 uint32_t *entry_num, void *data, 416 Error **errp) 417 { 418 int ret, fd = be->fd; 419 uint32_t total_entries = *entry_num; 420 struct iommu_hwpt_invalidate cache = { 421 .size = sizeof(cache), 422 .hwpt_id = id, 423 .data_type = data_type, 424 .entry_len = entry_len, 425 .entry_num = total_entries, 426 .data_uptr = (uintptr_t)data, 427 }; 428 429 ret = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cache); 430 trace_iommufd_backend_invalidate_cache(fd, id, data_type, entry_len, 431 total_entries, cache.entry_num, 432 (uintptr_t)data, ret ? errno : 0); 433 *entry_num = cache.entry_num; 434 435 if (ret) { 436 error_setg_errno(errp, errno, "IOMMU_HWPT_INVALIDATE failed:" 437 " total %d entries, processed %d entries", 438 total_entries, cache.entry_num); 439 } else if (total_entries != cache.entry_num) { 440 error_setg(errp, "IOMMU_HWPT_INVALIDATE succeed but with unprocessed" 441 " entries: total %d entries, processed %d entries." 442 " Kernel BUG?!", total_entries, cache.entry_num); 443 return false; 444 } 445 446 return !ret; 447 } 448 449 bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, 450 uint32_t hwpt_id, Error **errp) 451 { 452 HostIOMMUDeviceIOMMUFDClass *idevc = 453 HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev); 454 455 g_assert(idevc->attach_hwpt); 456 return idevc->attach_hwpt(idev, hwpt_id, errp); 457 } 458 459 bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev, 460 Error **errp) 461 { 462 HostIOMMUDeviceIOMMUFDClass *idevc = 463 HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev); 464 465 g_assert(idevc->detach_hwpt); 466 return idevc->detach_hwpt(idev, errp); 467 } 468 469 static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp) 470 { 471 HostIOMMUDeviceCaps *caps = &hiod->caps; 472 473 switch (cap) { 474 case HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE: 475 return caps->type; 476 case HOST_IOMMU_DEVICE_CAP_AW_BITS: 477 return vfio_device_get_aw_bits(hiod->agent); 478 default: 479 error_setg(errp, "%s: unsupported capability %x", hiod->name, cap); 480 return -EINVAL; 481 } 482 } 483 484 static void hiod_iommufd_class_init(ObjectClass *oc, const void *data) 485 { 486 HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc); 487 488 hioc->get_cap = hiod_iommufd_get_cap; 489 }; 490 491 static const TypeInfo types[] = { 492 { 493 .name = TYPE_IOMMUFD_BACKEND, 494 .parent = TYPE_OBJECT, 495 .instance_size = sizeof(IOMMUFDBackend), 496 .instance_init = iommufd_backend_init, 497 .instance_finalize = iommufd_backend_finalize, 498 .class_size = sizeof(IOMMUFDBackendClass), 499 .class_init = iommufd_backend_class_init, 500 .interfaces = (const InterfaceInfo[]) { 501 { TYPE_USER_CREATABLE }, 502 { } 503 } 504 }, { 505 .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, 506 .parent = TYPE_HOST_IOMMU_DEVICE, 507 .instance_size = sizeof(HostIOMMUDeviceIOMMUFD), 508 .class_size = sizeof(HostIOMMUDeviceIOMMUFDClass), 509 .class_init = hiod_iommufd_class_init, 510 .abstract = true, 511 } 512 }; 513 514 DEFINE_TYPES(types) 515