1 /* 2 * low level and IOMMU backend agnostic helpers used by VFIO devices, 3 * related to regions, interrupts, capabilities 4 * 5 * Copyright Red Hat, Inc. 2012 6 * 7 * Authors: 8 * Alex Williamson <alex.williamson@redhat.com> 9 * 10 * This work is licensed under the terms of the GNU GPL, version 2. See 11 * the COPYING file in the top-level directory. 12 * 13 * Based on qemu-kvm device-assignment: 14 * Adapted for KVM by Qumranet. 15 * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) 16 * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) 17 * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) 18 * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) 19 * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) 20 */ 21 22 #include "qemu/osdep.h" 23 #include <sys/ioctl.h> 24 25 #include "hw/vfio/vfio-common.h" 26 #include "hw/hw.h" 27 #include "trace.h" 28 #include "qapi/error.h" 29 #include "qemu/error-report.h" 30 31 /* 32 * Common VFIO interrupt disable 33 */ 34 void vfio_disable_irqindex(VFIODevice *vbasedev, int index) 35 { 36 struct vfio_irq_set irq_set = { 37 .argsz = sizeof(irq_set), 38 .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, 39 .index = index, 40 .start = 0, 41 .count = 0, 42 }; 43 44 ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); 45 } 46 47 void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) 48 { 49 struct vfio_irq_set irq_set = { 50 .argsz = sizeof(irq_set), 51 .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK, 52 .index = index, 53 .start = 0, 54 .count = 1, 55 }; 56 57 ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); 58 } 59 60 void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) 61 { 62 struct vfio_irq_set irq_set = { 63 .argsz = sizeof(irq_set), 64 .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK, 65 .index = index, 66 .start = 0, 67 .count = 1, 68 }; 69 70 ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); 71 } 72 73 static inline const char *action_to_str(int action) 74 { 75 switch (action) { 76 case VFIO_IRQ_SET_ACTION_MASK: 77 return "MASK"; 78 case VFIO_IRQ_SET_ACTION_UNMASK: 79 return "UNMASK"; 80 case VFIO_IRQ_SET_ACTION_TRIGGER: 81 return "TRIGGER"; 82 default: 83 return "UNKNOWN ACTION"; 84 } 85 } 86 87 static const char *index_to_str(VFIODevice *vbasedev, int index) 88 { 89 if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { 90 return NULL; 91 } 92 93 switch (index) { 94 case VFIO_PCI_INTX_IRQ_INDEX: 95 return "INTX"; 96 case VFIO_PCI_MSI_IRQ_INDEX: 97 return "MSI"; 98 case VFIO_PCI_MSIX_IRQ_INDEX: 99 return "MSIX"; 100 case VFIO_PCI_ERR_IRQ_INDEX: 101 return "ERR"; 102 case VFIO_PCI_REQ_IRQ_INDEX: 103 return "REQ"; 104 default: 105 return NULL; 106 } 107 } 108 109 int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex, 110 int action, int fd, Error **errp) 111 { 112 struct vfio_irq_set *irq_set; 113 int argsz, ret = 0; 114 const char *name; 115 int32_t *pfd; 116 117 argsz = sizeof(*irq_set) + sizeof(*pfd); 118 119 irq_set = g_malloc0(argsz); 120 irq_set->argsz = argsz; 121 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | action; 122 irq_set->index = index; 123 irq_set->start = subindex; 124 irq_set->count = 1; 125 pfd = (int32_t *)&irq_set->data; 126 *pfd = fd; 127 128 if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { 129 ret = -errno; 130 } 131 g_free(irq_set); 132 133 if (!ret) { 134 return 0; 135 } 136 137 error_setg_errno(errp, -ret, "VFIO_DEVICE_SET_IRQS failure"); 138 139 name = index_to_str(vbasedev, index); 140 if (name) { 141 error_prepend(errp, "%s-%d: ", name, subindex); 142 } else { 143 error_prepend(errp, "index %d-%d: ", index, subindex); 144 } 145 error_prepend(errp, 146 "Failed to %s %s eventfd signaling for interrupt ", 147 fd < 0 ? "tear down" : "set up", action_to_str(action)); 148 return ret; 149 } 150 151 /* 152 * IO Port/MMIO - Beware of the endians, VFIO is always little endian 153 */ 154 void vfio_region_write(void *opaque, hwaddr addr, 155 uint64_t data, unsigned size) 156 { 157 VFIORegion *region = opaque; 158 VFIODevice *vbasedev = region->vbasedev; 159 union { 160 uint8_t byte; 161 uint16_t word; 162 uint32_t dword; 163 uint64_t qword; 164 } buf; 165 166 switch (size) { 167 case 1: 168 buf.byte = data; 169 break; 170 case 2: 171 buf.word = cpu_to_le16(data); 172 break; 173 case 4: 174 buf.dword = cpu_to_le32(data); 175 break; 176 case 8: 177 buf.qword = cpu_to_le64(data); 178 break; 179 default: 180 hw_error("vfio: unsupported write size, %u bytes", size); 181 break; 182 } 183 184 if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { 185 error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64 186 ",%d) failed: %m", 187 __func__, vbasedev->name, region->nr, 188 addr, data, size); 189 } 190 191 trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size); 192 193 /* 194 * A read or write to a BAR always signals an INTx EOI. This will 195 * do nothing if not pending (including not in INTx mode). We assume 196 * that a BAR access is in response to an interrupt and that BAR 197 * accesses will service the interrupt. Unfortunately, we don't know 198 * which access will service the interrupt, so we're potentially 199 * getting quite a few host interrupts per guest interrupt. 200 */ 201 vbasedev->ops->vfio_eoi(vbasedev); 202 } 203 204 uint64_t vfio_region_read(void *opaque, 205 hwaddr addr, unsigned size) 206 { 207 VFIORegion *region = opaque; 208 VFIODevice *vbasedev = region->vbasedev; 209 union { 210 uint8_t byte; 211 uint16_t word; 212 uint32_t dword; 213 uint64_t qword; 214 } buf; 215 uint64_t data = 0; 216 217 if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { 218 error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m", 219 __func__, vbasedev->name, region->nr, 220 addr, size); 221 return (uint64_t)-1; 222 } 223 switch (size) { 224 case 1: 225 data = buf.byte; 226 break; 227 case 2: 228 data = le16_to_cpu(buf.word); 229 break; 230 case 4: 231 data = le32_to_cpu(buf.dword); 232 break; 233 case 8: 234 data = le64_to_cpu(buf.qword); 235 break; 236 default: 237 hw_error("vfio: unsupported read size, %u bytes", size); 238 break; 239 } 240 241 trace_vfio_region_read(vbasedev->name, region->nr, addr, size, data); 242 243 /* Same as write above */ 244 vbasedev->ops->vfio_eoi(vbasedev); 245 246 return data; 247 } 248 249 const MemoryRegionOps vfio_region_ops = { 250 .read = vfio_region_read, 251 .write = vfio_region_write, 252 .endianness = DEVICE_LITTLE_ENDIAN, 253 .valid = { 254 .min_access_size = 1, 255 .max_access_size = 8, 256 }, 257 .impl = { 258 .min_access_size = 1, 259 .max_access_size = 8, 260 }, 261 }; 262 263 int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size) 264 { 265 vbmap->pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size(); 266 vbmap->size = ROUND_UP(vbmap->pages, sizeof(__u64) * BITS_PER_BYTE) / 267 BITS_PER_BYTE; 268 vbmap->bitmap = g_try_malloc0(vbmap->size); 269 if (!vbmap->bitmap) { 270 return -ENOMEM; 271 } 272 273 return 0; 274 } 275 276 struct vfio_info_cap_header * 277 vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id) 278 { 279 struct vfio_info_cap_header *hdr; 280 281 for (hdr = ptr + cap_offset; hdr != ptr; hdr = ptr + hdr->next) { 282 if (hdr->id == id) { 283 return hdr; 284 } 285 } 286 287 return NULL; 288 } 289 290 struct vfio_info_cap_header * 291 vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) 292 { 293 if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) { 294 return NULL; 295 } 296 297 return vfio_get_cap((void *)info, info->cap_offset, id); 298 } 299 300 struct vfio_info_cap_header * 301 vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id) 302 { 303 if (!(info->flags & VFIO_DEVICE_FLAGS_CAPS)) { 304 return NULL; 305 } 306 307 return vfio_get_cap((void *)info, info->cap_offset, id); 308 } 309 310 static int vfio_setup_region_sparse_mmaps(VFIORegion *region, 311 struct vfio_region_info *info) 312 { 313 struct vfio_info_cap_header *hdr; 314 struct vfio_region_info_cap_sparse_mmap *sparse; 315 int i, j; 316 317 hdr = vfio_get_region_info_cap(info, VFIO_REGION_INFO_CAP_SPARSE_MMAP); 318 if (!hdr) { 319 return -ENODEV; 320 } 321 322 sparse = container_of(hdr, struct vfio_region_info_cap_sparse_mmap, header); 323 324 trace_vfio_region_sparse_mmap_header(region->vbasedev->name, 325 region->nr, sparse->nr_areas); 326 327 region->mmaps = g_new0(VFIOMmap, sparse->nr_areas); 328 329 for (i = 0, j = 0; i < sparse->nr_areas; i++) { 330 if (sparse->areas[i].size) { 331 trace_vfio_region_sparse_mmap_entry(i, sparse->areas[i].offset, 332 sparse->areas[i].offset + 333 sparse->areas[i].size - 1); 334 region->mmaps[j].offset = sparse->areas[i].offset; 335 region->mmaps[j].size = sparse->areas[i].size; 336 j++; 337 } 338 } 339 340 region->nr_mmaps = j; 341 region->mmaps = g_realloc(region->mmaps, j * sizeof(VFIOMmap)); 342 343 return 0; 344 } 345 346 int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, 347 int index, const char *name) 348 { 349 struct vfio_region_info *info; 350 int ret; 351 352 ret = vfio_get_region_info(vbasedev, index, &info); 353 if (ret) { 354 return ret; 355 } 356 357 region->vbasedev = vbasedev; 358 region->flags = info->flags; 359 region->size = info->size; 360 region->fd_offset = info->offset; 361 region->nr = index; 362 363 if (region->size) { 364 region->mem = g_new0(MemoryRegion, 1); 365 memory_region_init_io(region->mem, obj, &vfio_region_ops, 366 region, name, region->size); 367 368 if (!vbasedev->no_mmap && 369 region->flags & VFIO_REGION_INFO_FLAG_MMAP) { 370 371 ret = vfio_setup_region_sparse_mmaps(region, info); 372 373 if (ret) { 374 region->nr_mmaps = 1; 375 region->mmaps = g_new0(VFIOMmap, region->nr_mmaps); 376 region->mmaps[0].offset = 0; 377 region->mmaps[0].size = region->size; 378 } 379 } 380 } 381 382 g_free(info); 383 384 trace_vfio_region_setup(vbasedev->name, index, name, 385 region->flags, region->fd_offset, region->size); 386 return 0; 387 } 388 389 static void vfio_subregion_unmap(VFIORegion *region, int index) 390 { 391 trace_vfio_region_unmap(memory_region_name(®ion->mmaps[index].mem), 392 region->mmaps[index].offset, 393 region->mmaps[index].offset + 394 region->mmaps[index].size - 1); 395 memory_region_del_subregion(region->mem, ®ion->mmaps[index].mem); 396 munmap(region->mmaps[index].mmap, region->mmaps[index].size); 397 object_unparent(OBJECT(®ion->mmaps[index].mem)); 398 region->mmaps[index].mmap = NULL; 399 } 400 401 int vfio_region_mmap(VFIORegion *region) 402 { 403 int i, prot = 0; 404 char *name; 405 406 if (!region->mem) { 407 return 0; 408 } 409 410 prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0; 411 prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0; 412 413 for (i = 0; i < region->nr_mmaps; i++) { 414 region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot, 415 MAP_SHARED, region->vbasedev->fd, 416 region->fd_offset + 417 region->mmaps[i].offset); 418 if (region->mmaps[i].mmap == MAP_FAILED) { 419 int ret = -errno; 420 421 trace_vfio_region_mmap_fault(memory_region_name(region->mem), i, 422 region->fd_offset + 423 region->mmaps[i].offset, 424 region->fd_offset + 425 region->mmaps[i].offset + 426 region->mmaps[i].size - 1, ret); 427 428 region->mmaps[i].mmap = NULL; 429 430 for (i--; i >= 0; i--) { 431 vfio_subregion_unmap(region, i); 432 } 433 434 return ret; 435 } 436 437 name = g_strdup_printf("%s mmaps[%d]", 438 memory_region_name(region->mem), i); 439 memory_region_init_ram_device_ptr(®ion->mmaps[i].mem, 440 memory_region_owner(region->mem), 441 name, region->mmaps[i].size, 442 region->mmaps[i].mmap); 443 g_free(name); 444 memory_region_add_subregion(region->mem, region->mmaps[i].offset, 445 ®ion->mmaps[i].mem); 446 447 trace_vfio_region_mmap(memory_region_name(®ion->mmaps[i].mem), 448 region->mmaps[i].offset, 449 region->mmaps[i].offset + 450 region->mmaps[i].size - 1); 451 } 452 453 return 0; 454 } 455 456 void vfio_region_unmap(VFIORegion *region) 457 { 458 int i; 459 460 if (!region->mem) { 461 return; 462 } 463 464 for (i = 0; i < region->nr_mmaps; i++) { 465 if (region->mmaps[i].mmap) { 466 vfio_subregion_unmap(region, i); 467 } 468 } 469 } 470 471 void vfio_region_exit(VFIORegion *region) 472 { 473 int i; 474 475 if (!region->mem) { 476 return; 477 } 478 479 for (i = 0; i < region->nr_mmaps; i++) { 480 if (region->mmaps[i].mmap) { 481 memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem); 482 } 483 } 484 485 trace_vfio_region_exit(region->vbasedev->name, region->nr); 486 } 487 488 void vfio_region_finalize(VFIORegion *region) 489 { 490 int i; 491 492 if (!region->mem) { 493 return; 494 } 495 496 for (i = 0; i < region->nr_mmaps; i++) { 497 if (region->mmaps[i].mmap) { 498 munmap(region->mmaps[i].mmap, region->mmaps[i].size); 499 object_unparent(OBJECT(®ion->mmaps[i].mem)); 500 } 501 } 502 503 object_unparent(OBJECT(region->mem)); 504 505 g_free(region->mem); 506 g_free(region->mmaps); 507 508 trace_vfio_region_finalize(region->vbasedev->name, region->nr); 509 510 region->mem = NULL; 511 region->mmaps = NULL; 512 region->nr_mmaps = 0; 513 region->size = 0; 514 region->flags = 0; 515 region->nr = 0; 516 } 517 518 void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled) 519 { 520 int i; 521 522 if (!region->mem) { 523 return; 524 } 525 526 for (i = 0; i < region->nr_mmaps; i++) { 527 if (region->mmaps[i].mmap) { 528 memory_region_set_enabled(®ion->mmaps[i].mem, enabled); 529 } 530 } 531 532 trace_vfio_region_mmaps_set_enabled(memory_region_name(region->mem), 533 enabled); 534 } 535 536 int vfio_get_region_info(VFIODevice *vbasedev, int index, 537 struct vfio_region_info **info) 538 { 539 size_t argsz = sizeof(struct vfio_region_info); 540 541 *info = g_malloc0(argsz); 542 543 (*info)->index = index; 544 retry: 545 (*info)->argsz = argsz; 546 547 if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) { 548 g_free(*info); 549 *info = NULL; 550 return -errno; 551 } 552 553 if ((*info)->argsz > argsz) { 554 argsz = (*info)->argsz; 555 *info = g_realloc(*info, argsz); 556 557 goto retry; 558 } 559 560 return 0; 561 } 562 563 int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type, 564 uint32_t subtype, struct vfio_region_info **info) 565 { 566 int i; 567 568 for (i = 0; i < vbasedev->num_regions; i++) { 569 struct vfio_info_cap_header *hdr; 570 struct vfio_region_info_cap_type *cap_type; 571 572 if (vfio_get_region_info(vbasedev, i, info)) { 573 continue; 574 } 575 576 hdr = vfio_get_region_info_cap(*info, VFIO_REGION_INFO_CAP_TYPE); 577 if (!hdr) { 578 g_free(*info); 579 continue; 580 } 581 582 cap_type = container_of(hdr, struct vfio_region_info_cap_type, header); 583 584 trace_vfio_get_dev_region(vbasedev->name, i, 585 cap_type->type, cap_type->subtype); 586 587 if (cap_type->type == type && cap_type->subtype == subtype) { 588 return 0; 589 } 590 591 g_free(*info); 592 } 593 594 *info = NULL; 595 return -ENODEV; 596 } 597 598 bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) 599 { 600 struct vfio_region_info *info = NULL; 601 bool ret = false; 602 603 if (!vfio_get_region_info(vbasedev, region, &info)) { 604 if (vfio_get_region_info_cap(info, cap_type)) { 605 ret = true; 606 } 607 g_free(info); 608 } 609 610 return ret; 611 } 612