1 /* 2 * low level and IOMMU backend agnostic helpers used by VFIO devices, 3 * related to regions, interrupts, capabilities 4 * 5 * Copyright Red Hat, Inc. 2012 6 * 7 * Authors: 8 * Alex Williamson <alex.williamson@redhat.com> 9 * 10 * This work is licensed under the terms of the GNU GPL, version 2. See 11 * the COPYING file in the top-level directory. 12 * 13 * Based on qemu-kvm device-assignment: 14 * Adapted for KVM by Qumranet. 15 * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) 16 * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) 17 * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) 18 * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) 19 * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) 20 */ 21 22 #include "qemu/osdep.h" 23 #include <sys/ioctl.h> 24 25 #include "hw/vfio/vfio-common.h" 26 #include "hw/hw.h" 27 #include "trace.h" 28 #include "qapi/error.h" 29 #include "qemu/error-report.h" 30 #include "monitor/monitor.h" 31 32 /* 33 * Common VFIO interrupt disable 34 */ 35 void vfio_disable_irqindex(VFIODevice *vbasedev, int index) 36 { 37 struct vfio_irq_set irq_set = { 38 .argsz = sizeof(irq_set), 39 .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, 40 .index = index, 41 .start = 0, 42 .count = 0, 43 }; 44 45 ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); 46 } 47 48 void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) 49 { 50 struct vfio_irq_set irq_set = { 51 .argsz = sizeof(irq_set), 52 .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK, 53 .index = index, 54 .start = 0, 55 .count = 1, 56 }; 57 58 ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); 59 } 60 61 void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) 62 { 63 struct vfio_irq_set irq_set = { 64 .argsz = sizeof(irq_set), 65 .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK, 66 .index = index, 67 .start = 0, 68 .count = 1, 69 }; 70 71 ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); 72 } 73 74 static inline const char *action_to_str(int action) 75 { 76 switch (action) { 77 case VFIO_IRQ_SET_ACTION_MASK: 78 return "MASK"; 79 case VFIO_IRQ_SET_ACTION_UNMASK: 80 return "UNMASK"; 81 case VFIO_IRQ_SET_ACTION_TRIGGER: 82 return "TRIGGER"; 83 default: 84 return "UNKNOWN ACTION"; 85 } 86 } 87 88 static const char *index_to_str(VFIODevice *vbasedev, int index) 89 { 90 if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { 91 return NULL; 92 } 93 94 switch (index) { 95 case VFIO_PCI_INTX_IRQ_INDEX: 96 return "INTX"; 97 case VFIO_PCI_MSI_IRQ_INDEX: 98 return "MSI"; 99 case VFIO_PCI_MSIX_IRQ_INDEX: 100 return "MSIX"; 101 case VFIO_PCI_ERR_IRQ_INDEX: 102 return "ERR"; 103 case VFIO_PCI_REQ_IRQ_INDEX: 104 return "REQ"; 105 default: 106 return NULL; 107 } 108 } 109 110 int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex, 111 int action, int fd, Error **errp) 112 { 113 struct vfio_irq_set *irq_set; 114 int argsz, ret = 0; 115 const char *name; 116 int32_t *pfd; 117 118 argsz = sizeof(*irq_set) + sizeof(*pfd); 119 120 irq_set = g_malloc0(argsz); 121 irq_set->argsz = argsz; 122 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | action; 123 irq_set->index = index; 124 irq_set->start = subindex; 125 irq_set->count = 1; 126 pfd = (int32_t *)&irq_set->data; 127 *pfd = fd; 128 129 if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { 130 ret = -errno; 131 } 132 g_free(irq_set); 133 134 if (!ret) { 135 return 0; 136 } 137 138 error_setg_errno(errp, -ret, "VFIO_DEVICE_SET_IRQS failure"); 139 140 name = index_to_str(vbasedev, index); 141 if (name) { 142 error_prepend(errp, "%s-%d: ", name, subindex); 143 } else { 144 error_prepend(errp, "index %d-%d: ", index, subindex); 145 } 146 error_prepend(errp, 147 "Failed to %s %s eventfd signaling for interrupt ", 148 fd < 0 ? "tear down" : "set up", action_to_str(action)); 149 return ret; 150 } 151 152 /* 153 * IO Port/MMIO - Beware of the endians, VFIO is always little endian 154 */ 155 void vfio_region_write(void *opaque, hwaddr addr, 156 uint64_t data, unsigned size) 157 { 158 VFIORegion *region = opaque; 159 VFIODevice *vbasedev = region->vbasedev; 160 union { 161 uint8_t byte; 162 uint16_t word; 163 uint32_t dword; 164 uint64_t qword; 165 } buf; 166 167 switch (size) { 168 case 1: 169 buf.byte = data; 170 break; 171 case 2: 172 buf.word = cpu_to_le16(data); 173 break; 174 case 4: 175 buf.dword = cpu_to_le32(data); 176 break; 177 case 8: 178 buf.qword = cpu_to_le64(data); 179 break; 180 default: 181 hw_error("vfio: unsupported write size, %u bytes", size); 182 break; 183 } 184 185 if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { 186 error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64 187 ",%d) failed: %m", 188 __func__, vbasedev->name, region->nr, 189 addr, data, size); 190 } 191 192 trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size); 193 194 /* 195 * A read or write to a BAR always signals an INTx EOI. This will 196 * do nothing if not pending (including not in INTx mode). We assume 197 * that a BAR access is in response to an interrupt and that BAR 198 * accesses will service the interrupt. Unfortunately, we don't know 199 * which access will service the interrupt, so we're potentially 200 * getting quite a few host interrupts per guest interrupt. 201 */ 202 vbasedev->ops->vfio_eoi(vbasedev); 203 } 204 205 uint64_t vfio_region_read(void *opaque, 206 hwaddr addr, unsigned size) 207 { 208 VFIORegion *region = opaque; 209 VFIODevice *vbasedev = region->vbasedev; 210 union { 211 uint8_t byte; 212 uint16_t word; 213 uint32_t dword; 214 uint64_t qword; 215 } buf; 216 uint64_t data = 0; 217 218 if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { 219 error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m", 220 __func__, vbasedev->name, region->nr, 221 addr, size); 222 return (uint64_t)-1; 223 } 224 switch (size) { 225 case 1: 226 data = buf.byte; 227 break; 228 case 2: 229 data = le16_to_cpu(buf.word); 230 break; 231 case 4: 232 data = le32_to_cpu(buf.dword); 233 break; 234 case 8: 235 data = le64_to_cpu(buf.qword); 236 break; 237 default: 238 hw_error("vfio: unsupported read size, %u bytes", size); 239 break; 240 } 241 242 trace_vfio_region_read(vbasedev->name, region->nr, addr, size, data); 243 244 /* Same as write above */ 245 vbasedev->ops->vfio_eoi(vbasedev); 246 247 return data; 248 } 249 250 const MemoryRegionOps vfio_region_ops = { 251 .read = vfio_region_read, 252 .write = vfio_region_write, 253 .endianness = DEVICE_LITTLE_ENDIAN, 254 .valid = { 255 .min_access_size = 1, 256 .max_access_size = 8, 257 }, 258 .impl = { 259 .min_access_size = 1, 260 .max_access_size = 8, 261 }, 262 }; 263 264 int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size) 265 { 266 vbmap->pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size(); 267 vbmap->size = ROUND_UP(vbmap->pages, sizeof(__u64) * BITS_PER_BYTE) / 268 BITS_PER_BYTE; 269 vbmap->bitmap = g_try_malloc0(vbmap->size); 270 if (!vbmap->bitmap) { 271 return -ENOMEM; 272 } 273 274 return 0; 275 } 276 277 struct vfio_info_cap_header * 278 vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id) 279 { 280 struct vfio_info_cap_header *hdr; 281 282 for (hdr = ptr + cap_offset; hdr != ptr; hdr = ptr + hdr->next) { 283 if (hdr->id == id) { 284 return hdr; 285 } 286 } 287 288 return NULL; 289 } 290 291 struct vfio_info_cap_header * 292 vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) 293 { 294 if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) { 295 return NULL; 296 } 297 298 return vfio_get_cap((void *)info, info->cap_offset, id); 299 } 300 301 struct vfio_info_cap_header * 302 vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id) 303 { 304 if (!(info->flags & VFIO_DEVICE_FLAGS_CAPS)) { 305 return NULL; 306 } 307 308 return vfio_get_cap((void *)info, info->cap_offset, id); 309 } 310 311 static int vfio_setup_region_sparse_mmaps(VFIORegion *region, 312 struct vfio_region_info *info) 313 { 314 struct vfio_info_cap_header *hdr; 315 struct vfio_region_info_cap_sparse_mmap *sparse; 316 int i, j; 317 318 hdr = vfio_get_region_info_cap(info, VFIO_REGION_INFO_CAP_SPARSE_MMAP); 319 if (!hdr) { 320 return -ENODEV; 321 } 322 323 sparse = container_of(hdr, struct vfio_region_info_cap_sparse_mmap, header); 324 325 trace_vfio_region_sparse_mmap_header(region->vbasedev->name, 326 region->nr, sparse->nr_areas); 327 328 region->mmaps = g_new0(VFIOMmap, sparse->nr_areas); 329 330 for (i = 0, j = 0; i < sparse->nr_areas; i++) { 331 if (sparse->areas[i].size) { 332 trace_vfio_region_sparse_mmap_entry(i, sparse->areas[i].offset, 333 sparse->areas[i].offset + 334 sparse->areas[i].size - 1); 335 region->mmaps[j].offset = sparse->areas[i].offset; 336 region->mmaps[j].size = sparse->areas[i].size; 337 j++; 338 } 339 } 340 341 region->nr_mmaps = j; 342 region->mmaps = g_realloc(region->mmaps, j * sizeof(VFIOMmap)); 343 344 return 0; 345 } 346 347 int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, 348 int index, const char *name) 349 { 350 struct vfio_region_info *info; 351 int ret; 352 353 ret = vfio_get_region_info(vbasedev, index, &info); 354 if (ret) { 355 return ret; 356 } 357 358 region->vbasedev = vbasedev; 359 region->flags = info->flags; 360 region->size = info->size; 361 region->fd_offset = info->offset; 362 region->nr = index; 363 364 if (region->size) { 365 region->mem = g_new0(MemoryRegion, 1); 366 memory_region_init_io(region->mem, obj, &vfio_region_ops, 367 region, name, region->size); 368 369 if (!vbasedev->no_mmap && 370 region->flags & VFIO_REGION_INFO_FLAG_MMAP) { 371 372 ret = vfio_setup_region_sparse_mmaps(region, info); 373 374 if (ret) { 375 region->nr_mmaps = 1; 376 region->mmaps = g_new0(VFIOMmap, region->nr_mmaps); 377 region->mmaps[0].offset = 0; 378 region->mmaps[0].size = region->size; 379 } 380 } 381 } 382 383 g_free(info); 384 385 trace_vfio_region_setup(vbasedev->name, index, name, 386 region->flags, region->fd_offset, region->size); 387 return 0; 388 } 389 390 static void vfio_subregion_unmap(VFIORegion *region, int index) 391 { 392 trace_vfio_region_unmap(memory_region_name(®ion->mmaps[index].mem), 393 region->mmaps[index].offset, 394 region->mmaps[index].offset + 395 region->mmaps[index].size - 1); 396 memory_region_del_subregion(region->mem, ®ion->mmaps[index].mem); 397 munmap(region->mmaps[index].mmap, region->mmaps[index].size); 398 object_unparent(OBJECT(®ion->mmaps[index].mem)); 399 region->mmaps[index].mmap = NULL; 400 } 401 402 int vfio_region_mmap(VFIORegion *region) 403 { 404 int i, prot = 0; 405 char *name; 406 407 if (!region->mem) { 408 return 0; 409 } 410 411 prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0; 412 prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0; 413 414 for (i = 0; i < region->nr_mmaps; i++) { 415 region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot, 416 MAP_SHARED, region->vbasedev->fd, 417 region->fd_offset + 418 region->mmaps[i].offset); 419 if (region->mmaps[i].mmap == MAP_FAILED) { 420 int ret = -errno; 421 422 trace_vfio_region_mmap_fault(memory_region_name(region->mem), i, 423 region->fd_offset + 424 region->mmaps[i].offset, 425 region->fd_offset + 426 region->mmaps[i].offset + 427 region->mmaps[i].size - 1, ret); 428 429 region->mmaps[i].mmap = NULL; 430 431 for (i--; i >= 0; i--) { 432 vfio_subregion_unmap(region, i); 433 } 434 435 return ret; 436 } 437 438 name = g_strdup_printf("%s mmaps[%d]", 439 memory_region_name(region->mem), i); 440 memory_region_init_ram_device_ptr(®ion->mmaps[i].mem, 441 memory_region_owner(region->mem), 442 name, region->mmaps[i].size, 443 region->mmaps[i].mmap); 444 g_free(name); 445 memory_region_add_subregion(region->mem, region->mmaps[i].offset, 446 ®ion->mmaps[i].mem); 447 448 trace_vfio_region_mmap(memory_region_name(®ion->mmaps[i].mem), 449 region->mmaps[i].offset, 450 region->mmaps[i].offset + 451 region->mmaps[i].size - 1); 452 } 453 454 return 0; 455 } 456 457 void vfio_region_unmap(VFIORegion *region) 458 { 459 int i; 460 461 if (!region->mem) { 462 return; 463 } 464 465 for (i = 0; i < region->nr_mmaps; i++) { 466 if (region->mmaps[i].mmap) { 467 vfio_subregion_unmap(region, i); 468 } 469 } 470 } 471 472 void vfio_region_exit(VFIORegion *region) 473 { 474 int i; 475 476 if (!region->mem) { 477 return; 478 } 479 480 for (i = 0; i < region->nr_mmaps; i++) { 481 if (region->mmaps[i].mmap) { 482 memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem); 483 } 484 } 485 486 trace_vfio_region_exit(region->vbasedev->name, region->nr); 487 } 488 489 void vfio_region_finalize(VFIORegion *region) 490 { 491 int i; 492 493 if (!region->mem) { 494 return; 495 } 496 497 for (i = 0; i < region->nr_mmaps; i++) { 498 if (region->mmaps[i].mmap) { 499 munmap(region->mmaps[i].mmap, region->mmaps[i].size); 500 object_unparent(OBJECT(®ion->mmaps[i].mem)); 501 } 502 } 503 504 object_unparent(OBJECT(region->mem)); 505 506 g_free(region->mem); 507 g_free(region->mmaps); 508 509 trace_vfio_region_finalize(region->vbasedev->name, region->nr); 510 511 region->mem = NULL; 512 region->mmaps = NULL; 513 region->nr_mmaps = 0; 514 region->size = 0; 515 region->flags = 0; 516 region->nr = 0; 517 } 518 519 void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled) 520 { 521 int i; 522 523 if (!region->mem) { 524 return; 525 } 526 527 for (i = 0; i < region->nr_mmaps; i++) { 528 if (region->mmaps[i].mmap) { 529 memory_region_set_enabled(®ion->mmaps[i].mem, enabled); 530 } 531 } 532 533 trace_vfio_region_mmaps_set_enabled(memory_region_name(region->mem), 534 enabled); 535 } 536 537 int vfio_get_region_info(VFIODevice *vbasedev, int index, 538 struct vfio_region_info **info) 539 { 540 size_t argsz = sizeof(struct vfio_region_info); 541 542 *info = g_malloc0(argsz); 543 544 (*info)->index = index; 545 retry: 546 (*info)->argsz = argsz; 547 548 if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) { 549 g_free(*info); 550 *info = NULL; 551 return -errno; 552 } 553 554 if ((*info)->argsz > argsz) { 555 argsz = (*info)->argsz; 556 *info = g_realloc(*info, argsz); 557 558 goto retry; 559 } 560 561 return 0; 562 } 563 564 int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type, 565 uint32_t subtype, struct vfio_region_info **info) 566 { 567 int i; 568 569 for (i = 0; i < vbasedev->num_regions; i++) { 570 struct vfio_info_cap_header *hdr; 571 struct vfio_region_info_cap_type *cap_type; 572 573 if (vfio_get_region_info(vbasedev, i, info)) { 574 continue; 575 } 576 577 hdr = vfio_get_region_info_cap(*info, VFIO_REGION_INFO_CAP_TYPE); 578 if (!hdr) { 579 g_free(*info); 580 continue; 581 } 582 583 cap_type = container_of(hdr, struct vfio_region_info_cap_type, header); 584 585 trace_vfio_get_dev_region(vbasedev->name, i, 586 cap_type->type, cap_type->subtype); 587 588 if (cap_type->type == type && cap_type->subtype == subtype) { 589 return 0; 590 } 591 592 g_free(*info); 593 } 594 595 *info = NULL; 596 return -ENODEV; 597 } 598 599 bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) 600 { 601 struct vfio_region_info *info = NULL; 602 bool ret = false; 603 604 if (!vfio_get_region_info(vbasedev, region, &info)) { 605 if (vfio_get_region_info_cap(info, cap_type)) { 606 ret = true; 607 } 608 g_free(info); 609 } 610 611 return ret; 612 } 613 614 int vfio_device_get_name(VFIODevice *vbasedev, Error **errp) 615 { 616 struct stat st; 617 618 if (vbasedev->fd < 0) { 619 if (stat(vbasedev->sysfsdev, &st) < 0) { 620 error_setg_errno(errp, errno, "no such host device"); 621 error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); 622 return -errno; 623 } 624 /* User may specify a name, e.g: VFIO platform device */ 625 if (!vbasedev->name) { 626 vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); 627 } 628 } else { 629 if (!vbasedev->iommufd) { 630 error_setg(errp, "Use FD passing only with iommufd backend"); 631 return -EINVAL; 632 } 633 /* 634 * Give a name with fd so any function printing out vbasedev->name 635 * will not break. 636 */ 637 if (!vbasedev->name) { 638 vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); 639 } 640 } 641 642 return 0; 643 } 644 645 void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) 646 { 647 int fd = monitor_fd_param(monitor_cur(), str, errp); 648 649 if (fd < 0) { 650 error_prepend(errp, "Could not parse remote object fd %s:", str); 651 return; 652 } 653 vbasedev->fd = fd; 654 } 655 656 void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, 657 DeviceState *dev, bool ram_discard) 658 { 659 vbasedev->type = type; 660 vbasedev->ops = ops; 661 vbasedev->dev = dev; 662 vbasedev->fd = -1; 663 664 vbasedev->ram_block_discard_allowed = ram_discard; 665 } 666