1 /* 2 * Virtio PCI Bindings 3 * 4 * Copyright IBM, Corp. 2007 5 * Copyright (c) 2009 CodeSourcery 6 * 7 * Authors: 8 * Anthony Liguori <aliguori@us.ibm.com> 9 * Paul Brook <paul@codesourcery.com> 10 * 11 * This work is licensed under the terms of the GNU GPL, version 2. See 12 * the COPYING file in the top-level directory. 13 * 14 * Contributions after 2012-01-13 are licensed under the terms of the 15 * GNU GPL, version 2 or (at your option) any later version. 16 */ 17 18 #include "qemu/osdep.h" 19 20 #include "exec/memop.h" 21 #include "standard-headers/linux/virtio_pci.h" 22 #include "standard-headers/linux/virtio_ids.h" 23 #include "hw/boards.h" 24 #include "hw/virtio/virtio.h" 25 #include "migration/qemu-file-types.h" 26 #include "hw/pci/pci.h" 27 #include "hw/pci/pci_bus.h" 28 #include "hw/qdev-properties.h" 29 #include "qapi/error.h" 30 #include "qemu/error-report.h" 31 #include "qemu/log.h" 32 #include "qemu/module.h" 33 #include "hw/pci/msi.h" 34 #include "hw/pci/msix.h" 35 #include "hw/loader.h" 36 #include "system/kvm.h" 37 #include "hw/virtio/virtio-pci.h" 38 #include "qemu/range.h" 39 #include "hw/virtio/virtio-bus.h" 40 #include "qapi/visitor.h" 41 #include "system/replay.h" 42 #include "trace.h" 43 44 #define VIRTIO_PCI_REGION_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_present(dev)) 45 46 #undef VIRTIO_PCI_CONFIG 47 48 /* The remaining space is defined by each driver as the per-driver 49 * configuration space */ 50 #define VIRTIO_PCI_CONFIG_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_enabled(dev)) 51 52 static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, 53 VirtIOPCIProxy *dev); 54 static void virtio_pci_reset(DeviceState *qdev); 55 56 /* virtio device */ 57 /* DeviceState to VirtIOPCIProxy. For use off data-path. TODO: use QOM. */ 58 static inline VirtIOPCIProxy *to_virtio_pci_proxy(DeviceState *d) 59 { 60 return container_of(d, VirtIOPCIProxy, pci_dev.qdev); 61 } 62 63 /* DeviceState to VirtIOPCIProxy. Note: used on datapath, 64 * be careful and test performance if you change this. 65 */ 66 static inline VirtIOPCIProxy *to_virtio_pci_proxy_fast(DeviceState *d) 67 { 68 return container_of(d, VirtIOPCIProxy, pci_dev.qdev); 69 } 70 71 static void virtio_pci_notify(DeviceState *d, uint16_t vector) 72 { 73 VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d); 74 75 if (msix_enabled(&proxy->pci_dev)) { 76 if (vector != VIRTIO_NO_VECTOR) { 77 msix_notify(&proxy->pci_dev, vector); 78 } 79 } else { 80 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 81 pci_set_irq(&proxy->pci_dev, qatomic_read(&vdev->isr) & 1); 82 } 83 } 84 85 static void virtio_pci_save_config(DeviceState *d, QEMUFile *f) 86 { 87 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); 88 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 89 90 pci_device_save(&proxy->pci_dev, f); 91 msix_save(&proxy->pci_dev, f); 92 if (msix_present(&proxy->pci_dev)) 93 qemu_put_be16(f, vdev->config_vector); 94 } 95 96 static const VMStateDescription vmstate_virtio_pci_modern_queue_state = { 97 .name = "virtio_pci/modern_queue_state", 98 .version_id = 1, 99 .minimum_version_id = 1, 100 .fields = (const VMStateField[]) { 101 VMSTATE_UINT16(num, VirtIOPCIQueue), 102 VMSTATE_UNUSED(1), /* enabled was stored as be16 */ 103 VMSTATE_BOOL(enabled, VirtIOPCIQueue), 104 VMSTATE_UINT32_ARRAY(desc, VirtIOPCIQueue, 2), 105 VMSTATE_UINT32_ARRAY(avail, VirtIOPCIQueue, 2), 106 VMSTATE_UINT32_ARRAY(used, VirtIOPCIQueue, 2), 107 VMSTATE_END_OF_LIST() 108 } 109 }; 110 111 static bool virtio_pci_modern_state_needed(void *opaque) 112 { 113 VirtIOPCIProxy *proxy = opaque; 114 115 return virtio_pci_modern(proxy); 116 } 117 118 static const VMStateDescription vmstate_virtio_pci_modern_state_sub = { 119 .name = "virtio_pci/modern_state", 120 .version_id = 1, 121 .minimum_version_id = 1, 122 .needed = &virtio_pci_modern_state_needed, 123 .fields = (const VMStateField[]) { 124 VMSTATE_UINT32(dfselect, VirtIOPCIProxy), 125 VMSTATE_UINT32(gfselect, VirtIOPCIProxy), 126 VMSTATE_UINT32_ARRAY(guest_features, VirtIOPCIProxy, 2), 127 VMSTATE_STRUCT_ARRAY(vqs, VirtIOPCIProxy, VIRTIO_QUEUE_MAX, 0, 128 vmstate_virtio_pci_modern_queue_state, 129 VirtIOPCIQueue), 130 VMSTATE_END_OF_LIST() 131 } 132 }; 133 134 static const VMStateDescription vmstate_virtio_pci = { 135 .name = "virtio_pci", 136 .version_id = 1, 137 .minimum_version_id = 1, 138 .fields = (const VMStateField[]) { 139 VMSTATE_END_OF_LIST() 140 }, 141 .subsections = (const VMStateDescription * const []) { 142 &vmstate_virtio_pci_modern_state_sub, 143 NULL 144 } 145 }; 146 147 static bool virtio_pci_has_extra_state(DeviceState *d) 148 { 149 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); 150 151 return proxy->flags & VIRTIO_PCI_FLAG_MIGRATE_EXTRA; 152 } 153 154 static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f) 155 { 156 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); 157 158 vmstate_save_state(f, &vmstate_virtio_pci, proxy, NULL); 159 } 160 161 static int virtio_pci_load_extra_state(DeviceState *d, QEMUFile *f) 162 { 163 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); 164 165 return vmstate_load_state(f, &vmstate_virtio_pci, proxy, 1); 166 } 167 168 static void virtio_pci_save_queue(DeviceState *d, int n, QEMUFile *f) 169 { 170 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); 171 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 172 173 if (msix_present(&proxy->pci_dev)) 174 qemu_put_be16(f, virtio_queue_vector(vdev, n)); 175 } 176 177 static int virtio_pci_load_config(DeviceState *d, QEMUFile *f) 178 { 179 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); 180 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 181 uint16_t vector; 182 183 int ret; 184 ret = pci_device_load(&proxy->pci_dev, f); 185 if (ret) { 186 return ret; 187 } 188 msix_unuse_all_vectors(&proxy->pci_dev); 189 msix_load(&proxy->pci_dev, f); 190 if (msix_present(&proxy->pci_dev)) { 191 qemu_get_be16s(f, &vector); 192 193 if (vector != VIRTIO_NO_VECTOR && vector >= proxy->nvectors) { 194 return -EINVAL; 195 } 196 } else { 197 vector = VIRTIO_NO_VECTOR; 198 } 199 vdev->config_vector = vector; 200 if (vector != VIRTIO_NO_VECTOR) { 201 msix_vector_use(&proxy->pci_dev, vector); 202 } 203 return 0; 204 } 205 206 static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f) 207 { 208 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); 209 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 210 211 uint16_t vector; 212 if (msix_present(&proxy->pci_dev)) { 213 qemu_get_be16s(f, &vector); 214 if (vector != VIRTIO_NO_VECTOR && vector >= proxy->nvectors) { 215 return -EINVAL; 216 } 217 } else { 218 vector = VIRTIO_NO_VECTOR; 219 } 220 virtio_queue_set_vector(vdev, n, vector); 221 if (vector != VIRTIO_NO_VECTOR) { 222 msix_vector_use(&proxy->pci_dev, vector); 223 } 224 225 return 0; 226 } 227 228 typedef struct VirtIOPCIIDInfo { 229 /* virtio id */ 230 uint16_t vdev_id; 231 /* pci device id for the transitional device */ 232 uint16_t trans_devid; 233 uint16_t class_id; 234 } VirtIOPCIIDInfo; 235 236 static const VirtIOPCIIDInfo virtio_pci_id_info[] = { 237 { 238 .vdev_id = VIRTIO_ID_CRYPTO, 239 .class_id = PCI_CLASS_OTHERS, 240 }, { 241 .vdev_id = VIRTIO_ID_FS, 242 .class_id = PCI_CLASS_STORAGE_OTHER, 243 }, { 244 .vdev_id = VIRTIO_ID_NET, 245 .trans_devid = PCI_DEVICE_ID_VIRTIO_NET, 246 .class_id = PCI_CLASS_NETWORK_ETHERNET, 247 }, { 248 .vdev_id = VIRTIO_ID_BLOCK, 249 .trans_devid = PCI_DEVICE_ID_VIRTIO_BLOCK, 250 .class_id = PCI_CLASS_STORAGE_SCSI, 251 }, { 252 .vdev_id = VIRTIO_ID_CONSOLE, 253 .trans_devid = PCI_DEVICE_ID_VIRTIO_CONSOLE, 254 .class_id = PCI_CLASS_COMMUNICATION_OTHER, 255 }, { 256 .vdev_id = VIRTIO_ID_SCSI, 257 .trans_devid = PCI_DEVICE_ID_VIRTIO_SCSI, 258 .class_id = PCI_CLASS_STORAGE_SCSI 259 }, { 260 .vdev_id = VIRTIO_ID_9P, 261 .trans_devid = PCI_DEVICE_ID_VIRTIO_9P, 262 .class_id = PCI_BASE_CLASS_NETWORK, 263 }, { 264 .vdev_id = VIRTIO_ID_BALLOON, 265 .trans_devid = PCI_DEVICE_ID_VIRTIO_BALLOON, 266 .class_id = PCI_CLASS_OTHERS, 267 }, { 268 .vdev_id = VIRTIO_ID_RNG, 269 .trans_devid = PCI_DEVICE_ID_VIRTIO_RNG, 270 .class_id = PCI_CLASS_OTHERS, 271 }, 272 }; 273 274 static const VirtIOPCIIDInfo *virtio_pci_get_id_info(uint16_t vdev_id) 275 { 276 const VirtIOPCIIDInfo *info = NULL; 277 int i; 278 279 for (i = 0; i < ARRAY_SIZE(virtio_pci_id_info); i++) { 280 if (virtio_pci_id_info[i].vdev_id == vdev_id) { 281 info = &virtio_pci_id_info[i]; 282 break; 283 } 284 } 285 286 if (!info) { 287 /* The device id is invalid or not added to the id_info yet. */ 288 error_report("Invalid virtio device(id %u)", vdev_id); 289 abort(); 290 } 291 292 return info; 293 } 294 295 /* 296 * Get the Transitional Device ID for the specific device, return 297 * zero if the device is non-transitional. 298 */ 299 uint16_t virtio_pci_get_trans_devid(uint16_t device_id) 300 { 301 return virtio_pci_get_id_info(device_id)->trans_devid; 302 } 303 304 /* 305 * Get the Class ID for the specific device. 306 */ 307 uint16_t virtio_pci_get_class_id(uint16_t device_id) 308 { 309 return virtio_pci_get_id_info(device_id)->class_id; 310 } 311 312 static bool virtio_pci_ioeventfd_enabled(DeviceState *d) 313 { 314 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); 315 316 return (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) != 0; 317 } 318 319 #define QEMU_VIRTIO_PCI_QUEUE_MEM_MULT 0x1000 320 321 static inline int virtio_pci_queue_mem_mult(struct VirtIOPCIProxy *proxy) 322 { 323 return (proxy->flags & VIRTIO_PCI_FLAG_PAGE_PER_VQ) ? 324 QEMU_VIRTIO_PCI_QUEUE_MEM_MULT : 4; 325 } 326 327 static int virtio_pci_ioeventfd_assign(DeviceState *d, EventNotifier *notifier, 328 int n, bool assign) 329 { 330 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); 331 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 332 VirtQueue *vq = virtio_get_queue(vdev, n); 333 bool legacy = virtio_pci_legacy(proxy); 334 bool modern = virtio_pci_modern(proxy); 335 bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; 336 MemoryRegion *modern_mr = &proxy->notify.mr; 337 MemoryRegion *modern_notify_mr = &proxy->notify_pio.mr; 338 MemoryRegion *legacy_mr = &proxy->bar; 339 hwaddr modern_addr = virtio_pci_queue_mem_mult(proxy) * 340 virtio_get_queue_index(vq); 341 hwaddr legacy_addr = VIRTIO_PCI_QUEUE_NOTIFY; 342 343 if (assign) { 344 if (modern) { 345 memory_region_add_eventfd(modern_mr, modern_addr, 0, 346 false, n, notifier); 347 if (modern_pio) { 348 memory_region_add_eventfd(modern_notify_mr, 0, 2, 349 true, n, notifier); 350 } 351 } 352 if (legacy) { 353 memory_region_add_eventfd(legacy_mr, legacy_addr, 2, 354 true, n, notifier); 355 } 356 } else { 357 if (modern) { 358 memory_region_del_eventfd(modern_mr, modern_addr, 0, 359 false, n, notifier); 360 if (modern_pio) { 361 memory_region_del_eventfd(modern_notify_mr, 0, 2, 362 true, n, notifier); 363 } 364 } 365 if (legacy) { 366 memory_region_del_eventfd(legacy_mr, legacy_addr, 2, 367 true, n, notifier); 368 } 369 } 370 return 0; 371 } 372 373 static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy) 374 { 375 virtio_bus_start_ioeventfd(&proxy->bus); 376 } 377 378 static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy) 379 { 380 virtio_bus_stop_ioeventfd(&proxy->bus); 381 } 382 383 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) 384 { 385 VirtIOPCIProxy *proxy = opaque; 386 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 387 uint16_t vector, vq_idx; 388 hwaddr pa; 389 390 switch (addr) { 391 case VIRTIO_PCI_GUEST_FEATURES: 392 /* Guest does not negotiate properly? We have to assume nothing. */ 393 if (val & (1 << VIRTIO_F_BAD_FEATURE)) { 394 val = virtio_bus_get_vdev_bad_features(&proxy->bus); 395 } 396 virtio_set_features(vdev, val); 397 break; 398 case VIRTIO_PCI_QUEUE_PFN: 399 pa = (hwaddr)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT; 400 if (pa == 0) { 401 virtio_pci_reset(DEVICE(proxy)); 402 } 403 else 404 virtio_queue_set_addr(vdev, vdev->queue_sel, pa); 405 break; 406 case VIRTIO_PCI_QUEUE_SEL: 407 if (val < VIRTIO_QUEUE_MAX) 408 vdev->queue_sel = val; 409 break; 410 case VIRTIO_PCI_QUEUE_NOTIFY: 411 vq_idx = val; 412 if (vq_idx < VIRTIO_QUEUE_MAX && virtio_queue_get_num(vdev, vq_idx)) { 413 if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) { 414 VirtQueue *vq = virtio_get_queue(vdev, vq_idx); 415 416 virtio_queue_set_shadow_avail_idx(vq, val >> 16); 417 } 418 virtio_queue_notify(vdev, vq_idx); 419 } 420 break; 421 case VIRTIO_PCI_STATUS: 422 if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) { 423 virtio_pci_stop_ioeventfd(proxy); 424 } 425 426 virtio_set_status(vdev, val & 0xFF); 427 428 if (val & VIRTIO_CONFIG_S_DRIVER_OK) { 429 virtio_pci_start_ioeventfd(proxy); 430 } 431 432 if (vdev->status == 0) { 433 virtio_pci_reset(DEVICE(proxy)); 434 } 435 436 /* Linux before 2.6.34 drives the device without enabling 437 the PCI device bus master bit. Enable it automatically 438 for the guest. This is a PCI spec violation but so is 439 initiating DMA with bus master bit clear. */ 440 if (val == (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER)) { 441 pci_default_write_config(&proxy->pci_dev, PCI_COMMAND, 442 proxy->pci_dev.config[PCI_COMMAND] | 443 PCI_COMMAND_MASTER, 1); 444 } 445 break; 446 case VIRTIO_MSI_CONFIG_VECTOR: 447 if (vdev->config_vector != VIRTIO_NO_VECTOR) { 448 msix_vector_unuse(&proxy->pci_dev, vdev->config_vector); 449 } 450 /* Make it possible for guest to discover an error took place. */ 451 if (val < proxy->nvectors) { 452 msix_vector_use(&proxy->pci_dev, val); 453 } else { 454 val = VIRTIO_NO_VECTOR; 455 } 456 vdev->config_vector = val; 457 break; 458 case VIRTIO_MSI_QUEUE_VECTOR: 459 vector = virtio_queue_vector(vdev, vdev->queue_sel); 460 if (vector != VIRTIO_NO_VECTOR) { 461 msix_vector_unuse(&proxy->pci_dev, vector); 462 } 463 /* Make it possible for guest to discover an error took place. */ 464 if (val < proxy->nvectors) { 465 msix_vector_use(&proxy->pci_dev, val); 466 } else { 467 val = VIRTIO_NO_VECTOR; 468 } 469 virtio_queue_set_vector(vdev, vdev->queue_sel, val); 470 break; 471 default: 472 qemu_log_mask(LOG_GUEST_ERROR, 473 "%s: unexpected address 0x%x value 0x%x\n", 474 __func__, addr, val); 475 break; 476 } 477 } 478 479 static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr) 480 { 481 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 482 uint32_t ret = 0xFFFFFFFF; 483 484 switch (addr) { 485 case VIRTIO_PCI_HOST_FEATURES: 486 ret = vdev->host_features; 487 break; 488 case VIRTIO_PCI_GUEST_FEATURES: 489 ret = vdev->guest_features; 490 break; 491 case VIRTIO_PCI_QUEUE_PFN: 492 ret = virtio_queue_get_addr(vdev, vdev->queue_sel) 493 >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; 494 break; 495 case VIRTIO_PCI_QUEUE_NUM: 496 ret = virtio_queue_get_num(vdev, vdev->queue_sel); 497 break; 498 case VIRTIO_PCI_QUEUE_SEL: 499 ret = vdev->queue_sel; 500 break; 501 case VIRTIO_PCI_STATUS: 502 ret = vdev->status; 503 break; 504 case VIRTIO_PCI_ISR: 505 /* reading from the ISR also clears it. */ 506 ret = qatomic_xchg(&vdev->isr, 0); 507 pci_irq_deassert(&proxy->pci_dev); 508 break; 509 case VIRTIO_MSI_CONFIG_VECTOR: 510 ret = vdev->config_vector; 511 break; 512 case VIRTIO_MSI_QUEUE_VECTOR: 513 ret = virtio_queue_vector(vdev, vdev->queue_sel); 514 break; 515 default: 516 break; 517 } 518 519 return ret; 520 } 521 522 static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr, 523 unsigned size) 524 { 525 VirtIOPCIProxy *proxy = opaque; 526 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 527 uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev); 528 uint64_t val = 0; 529 530 if (vdev == NULL) { 531 return UINT64_MAX; 532 } 533 534 if (addr < config) { 535 return virtio_ioport_read(proxy, addr); 536 } 537 addr -= config; 538 539 switch (size) { 540 case 1: 541 val = virtio_config_readb(vdev, addr); 542 break; 543 case 2: 544 val = virtio_config_readw(vdev, addr); 545 if (virtio_is_big_endian(vdev)) { 546 val = bswap16(val); 547 } 548 break; 549 case 4: 550 val = virtio_config_readl(vdev, addr); 551 if (virtio_is_big_endian(vdev)) { 552 val = bswap32(val); 553 } 554 break; 555 } 556 return val; 557 } 558 559 static void virtio_pci_config_write(void *opaque, hwaddr addr, 560 uint64_t val, unsigned size) 561 { 562 VirtIOPCIProxy *proxy = opaque; 563 uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev); 564 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 565 566 if (vdev == NULL) { 567 return; 568 } 569 570 if (addr < config) { 571 virtio_ioport_write(proxy, addr, val); 572 return; 573 } 574 addr -= config; 575 /* 576 * Virtio-PCI is odd. Ioports are LE but config space is target native 577 * endian. 578 */ 579 switch (size) { 580 case 1: 581 virtio_config_writeb(vdev, addr, val); 582 break; 583 case 2: 584 if (virtio_is_big_endian(vdev)) { 585 val = bswap16(val); 586 } 587 virtio_config_writew(vdev, addr, val); 588 break; 589 case 4: 590 if (virtio_is_big_endian(vdev)) { 591 val = bswap32(val); 592 } 593 virtio_config_writel(vdev, addr, val); 594 break; 595 } 596 } 597 598 static const MemoryRegionOps virtio_pci_config_ops = { 599 .read = virtio_pci_config_read, 600 .write = virtio_pci_config_write, 601 .impl = { 602 .min_access_size = 1, 603 .max_access_size = 4, 604 }, 605 .endianness = DEVICE_LITTLE_ENDIAN, 606 }; 607 608 static MemoryRegion *virtio_address_space_lookup(VirtIOPCIProxy *proxy, 609 hwaddr *off, int len) 610 { 611 int i; 612 VirtIOPCIRegion *reg; 613 614 for (i = 0; i < ARRAY_SIZE(proxy->regs); ++i) { 615 reg = &proxy->regs[i]; 616 if (*off >= reg->offset && 617 *off + len <= reg->offset + reg->size) { 618 MemoryRegionSection mrs = memory_region_find(®->mr, 619 *off - reg->offset, len); 620 assert(mrs.mr); 621 *off = mrs.offset_within_region; 622 memory_region_unref(mrs.mr); 623 return mrs.mr; 624 } 625 } 626 627 return NULL; 628 } 629 630 /* Below are generic functions to do memcpy from/to an address space, 631 * without byteswaps, with input validation. 632 * 633 * As regular address_space_* APIs all do some kind of byteswap at least for 634 * some host/target combinations, we are forced to explicitly convert to a 635 * known-endianness integer value. 636 * It doesn't really matter which endian format to go through, so the code 637 * below selects the endian that causes the least amount of work on the given 638 * host. 639 * 640 * Note: host pointer must be aligned. 641 */ 642 static 643 void virtio_address_space_write(VirtIOPCIProxy *proxy, hwaddr addr, 644 const uint8_t *buf, int len) 645 { 646 uint64_t val; 647 MemoryRegion *mr; 648 649 /* address_space_* APIs assume an aligned address. 650 * As address is under guest control, handle illegal values. 651 */ 652 addr &= ~(len - 1); 653 654 mr = virtio_address_space_lookup(proxy, &addr, len); 655 if (!mr) { 656 return; 657 } 658 659 /* Make sure caller aligned buf properly */ 660 assert(!(((uintptr_t)buf) & (len - 1))); 661 662 switch (len) { 663 case 1: 664 val = pci_get_byte(buf); 665 break; 666 case 2: 667 val = pci_get_word(buf); 668 break; 669 case 4: 670 val = pci_get_long(buf); 671 break; 672 default: 673 /* As length is under guest control, handle illegal values. */ 674 return; 675 } 676 memory_region_dispatch_write(mr, addr, val, size_memop(len) | MO_LE, 677 MEMTXATTRS_UNSPECIFIED); 678 } 679 680 static void 681 virtio_address_space_read(VirtIOPCIProxy *proxy, hwaddr addr, 682 uint8_t *buf, int len) 683 { 684 uint64_t val; 685 MemoryRegion *mr; 686 687 /* address_space_* APIs assume an aligned address. 688 * As address is under guest control, handle illegal values. 689 */ 690 addr &= ~(len - 1); 691 692 mr = virtio_address_space_lookup(proxy, &addr, len); 693 if (!mr) { 694 return; 695 } 696 697 /* Make sure caller aligned buf properly */ 698 assert(!(((uintptr_t)buf) & (len - 1))); 699 700 memory_region_dispatch_read(mr, addr, &val, size_memop(len) | MO_LE, 701 MEMTXATTRS_UNSPECIFIED); 702 switch (len) { 703 case 1: 704 pci_set_byte(buf, val); 705 break; 706 case 2: 707 pci_set_word(buf, val); 708 break; 709 case 4: 710 pci_set_long(buf, val); 711 break; 712 default: 713 /* As length is under guest control, handle illegal values. */ 714 break; 715 } 716 } 717 718 static void virtio_pci_ats_ctrl_trigger(PCIDevice *pci_dev, bool enable) 719 { 720 VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev); 721 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 722 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 723 724 vdev->device_iotlb_enabled = enable; 725 726 if (k->toggle_device_iotlb) { 727 k->toggle_device_iotlb(vdev); 728 } 729 } 730 731 static void pcie_ats_config_write(PCIDevice *dev, uint32_t address, 732 uint32_t val, int len) 733 { 734 uint32_t off; 735 uint16_t ats_cap = dev->exp.ats_cap; 736 737 if (!ats_cap || address < ats_cap) { 738 return; 739 } 740 off = address - ats_cap; 741 if (off >= PCI_EXT_CAP_ATS_SIZEOF) { 742 return; 743 } 744 745 if (range_covers_byte(off, len, PCI_ATS_CTRL + 1)) { 746 virtio_pci_ats_ctrl_trigger(dev, !!(val & PCI_ATS_CTRL_ENABLE)); 747 } 748 } 749 750 static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, 751 uint32_t val, int len) 752 { 753 VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev); 754 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 755 struct virtio_pci_cfg_cap *cfg; 756 757 pci_default_write_config(pci_dev, address, val, len); 758 759 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) { 760 pcie_cap_flr_write_config(pci_dev, address, val, len); 761 } 762 763 if (proxy->flags & VIRTIO_PCI_FLAG_ATS) { 764 pcie_ats_config_write(pci_dev, address, val, len); 765 } 766 767 if (range_covers_byte(address, len, PCI_COMMAND)) { 768 if (!(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { 769 virtio_set_disabled(vdev, true); 770 virtio_pci_stop_ioeventfd(proxy); 771 virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK); 772 } else { 773 virtio_set_disabled(vdev, false); 774 } 775 } 776 777 if (proxy->config_cap && 778 ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap, 779 pci_cfg_data), 780 sizeof cfg->pci_cfg_data)) { 781 uint32_t off; 782 uint32_t caplen; 783 784 cfg = (void *)(proxy->pci_dev.config + proxy->config_cap); 785 off = le32_to_cpu(cfg->cap.offset); 786 caplen = le32_to_cpu(cfg->cap.length); 787 788 if (caplen == 1 || caplen == 2 || caplen == 4) { 789 assert(caplen <= sizeof cfg->pci_cfg_data); 790 virtio_address_space_write(proxy, off, cfg->pci_cfg_data, caplen); 791 } 792 } 793 } 794 795 static uint32_t virtio_read_config(PCIDevice *pci_dev, 796 uint32_t address, int len) 797 { 798 VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev); 799 struct virtio_pci_cfg_cap *cfg; 800 801 if (proxy->config_cap && 802 ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap, 803 pci_cfg_data), 804 sizeof cfg->pci_cfg_data)) { 805 uint32_t off; 806 uint32_t caplen; 807 808 cfg = (void *)(proxy->pci_dev.config + proxy->config_cap); 809 off = le32_to_cpu(cfg->cap.offset); 810 caplen = le32_to_cpu(cfg->cap.length); 811 812 if (caplen == 1 || caplen == 2 || caplen == 4) { 813 assert(caplen <= sizeof cfg->pci_cfg_data); 814 virtio_address_space_read(proxy, off, cfg->pci_cfg_data, caplen); 815 } 816 } 817 818 return pci_default_read_config(pci_dev, address, len); 819 } 820 821 static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, 822 unsigned int vector) 823 { 824 VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; 825 int ret; 826 827 if (irqfd->users == 0) { 828 KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state); 829 ret = kvm_irqchip_add_msi_route(&c, vector, &proxy->pci_dev); 830 if (ret < 0) { 831 return ret; 832 } 833 kvm_irqchip_commit_route_changes(&c); 834 irqfd->virq = ret; 835 } 836 irqfd->users++; 837 return 0; 838 } 839 840 static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy, 841 unsigned int vector) 842 { 843 VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; 844 if (--irqfd->users == 0) { 845 kvm_irqchip_release_virq(kvm_state, irqfd->virq); 846 } 847 } 848 849 static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy, 850 EventNotifier *n, 851 unsigned int vector) 852 { 853 VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; 854 return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq); 855 } 856 857 static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy, 858 EventNotifier *n , 859 unsigned int vector) 860 { 861 VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; 862 int ret; 863 864 ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq); 865 assert(ret == 0); 866 } 867 static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, 868 EventNotifier **n, unsigned int *vector) 869 { 870 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 871 VirtQueue *vq; 872 873 if (!proxy->vector_irqfd && vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) 874 return -1; 875 876 if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { 877 *n = virtio_config_get_guest_notifier(vdev); 878 *vector = vdev->config_vector; 879 } else { 880 if (!virtio_queue_get_num(vdev, queue_no)) { 881 return -1; 882 } 883 *vector = virtio_queue_vector(vdev, queue_no); 884 vq = virtio_get_queue(vdev, queue_no); 885 *n = virtio_queue_get_guest_notifier(vq); 886 } 887 return 0; 888 } 889 890 static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no) 891 { 892 unsigned int vector; 893 int ret; 894 EventNotifier *n; 895 PCIDevice *dev = &proxy->pci_dev; 896 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 897 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 898 899 ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); 900 if (ret < 0) { 901 return ret; 902 } 903 if (vector >= msix_nr_vectors_allocated(dev)) { 904 return 0; 905 } 906 ret = kvm_virtio_pci_vq_vector_use(proxy, vector); 907 if (ret < 0) { 908 return ret; 909 } 910 /* 911 * If guest supports masking, set up irqfd now. 912 * Otherwise, delay until unmasked in the frontend. 913 */ 914 if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { 915 ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); 916 if (ret < 0) { 917 kvm_virtio_pci_vq_vector_release(proxy, vector); 918 return ret; 919 } 920 } 921 922 return 0; 923 } 924 static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) 925 { 926 int queue_no; 927 int ret = 0; 928 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 929 930 for (queue_no = 0; queue_no < nvqs; queue_no++) { 931 if (!virtio_queue_get_num(vdev, queue_no)) { 932 return -1; 933 } 934 ret = kvm_virtio_pci_vector_use_one(proxy, queue_no); 935 } 936 return ret; 937 } 938 939 static int kvm_virtio_pci_vector_config_use(VirtIOPCIProxy *proxy) 940 { 941 return kvm_virtio_pci_vector_use_one(proxy, VIRTIO_CONFIG_IRQ_IDX); 942 } 943 944 static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, 945 int queue_no) 946 { 947 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 948 unsigned int vector; 949 EventNotifier *n; 950 int ret; 951 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 952 PCIDevice *dev = &proxy->pci_dev; 953 954 ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); 955 if (ret < 0) { 956 return; 957 } 958 if (vector >= msix_nr_vectors_allocated(dev)) { 959 return; 960 } 961 if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { 962 kvm_virtio_pci_irqfd_release(proxy, n, vector); 963 } 964 kvm_virtio_pci_vq_vector_release(proxy, vector); 965 } 966 967 static void kvm_virtio_pci_vector_vq_release(VirtIOPCIProxy *proxy, int nvqs) 968 { 969 int queue_no; 970 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 971 972 for (queue_no = 0; queue_no < nvqs; queue_no++) { 973 if (!virtio_queue_get_num(vdev, queue_no)) { 974 break; 975 } 976 kvm_virtio_pci_vector_release_one(proxy, queue_no); 977 } 978 } 979 980 static void kvm_virtio_pci_vector_config_release(VirtIOPCIProxy *proxy) 981 { 982 kvm_virtio_pci_vector_release_one(proxy, VIRTIO_CONFIG_IRQ_IDX); 983 } 984 985 static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, 986 unsigned int queue_no, 987 unsigned int vector, 988 MSIMessage msg, 989 EventNotifier *n) 990 { 991 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 992 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 993 VirtIOIRQFD *irqfd; 994 int ret = 0; 995 996 if (proxy->vector_irqfd) { 997 irqfd = &proxy->vector_irqfd[vector]; 998 if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) { 999 ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg, 1000 &proxy->pci_dev); 1001 if (ret < 0) { 1002 return ret; 1003 } 1004 kvm_irqchip_commit_routes(kvm_state); 1005 } 1006 } 1007 1008 /* If guest supports masking, irqfd is already setup, unmask it. 1009 * Otherwise, set it up now. 1010 */ 1011 if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { 1012 k->guest_notifier_mask(vdev, queue_no, false); 1013 /* Test after unmasking to avoid losing events. */ 1014 if (k->guest_notifier_pending && 1015 k->guest_notifier_pending(vdev, queue_no)) { 1016 event_notifier_set(n); 1017 } 1018 } else { 1019 ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); 1020 } 1021 return ret; 1022 } 1023 1024 static void virtio_pci_one_vector_mask(VirtIOPCIProxy *proxy, 1025 unsigned int queue_no, 1026 unsigned int vector, 1027 EventNotifier *n) 1028 { 1029 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 1030 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1031 1032 /* If guest supports masking, keep irqfd but mask it. 1033 * Otherwise, clean it up now. 1034 */ 1035 if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { 1036 k->guest_notifier_mask(vdev, queue_no, true); 1037 } else { 1038 kvm_virtio_pci_irqfd_release(proxy, n, vector); 1039 } 1040 } 1041 1042 static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, 1043 MSIMessage msg) 1044 { 1045 VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); 1046 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 1047 VirtQueue *vq = virtio_vector_first_queue(vdev, vector); 1048 EventNotifier *n; 1049 int ret, index, unmasked = 0; 1050 1051 while (vq) { 1052 index = virtio_get_queue_index(vq); 1053 if (!virtio_queue_get_num(vdev, index)) { 1054 break; 1055 } 1056 if (index < proxy->nvqs_with_notifiers) { 1057 n = virtio_queue_get_guest_notifier(vq); 1058 ret = virtio_pci_one_vector_unmask(proxy, index, vector, msg, n); 1059 if (ret < 0) { 1060 goto undo; 1061 } 1062 ++unmasked; 1063 } 1064 vq = virtio_vector_next_queue(vq); 1065 } 1066 /* unmask config intr */ 1067 if (vector == vdev->config_vector) { 1068 n = virtio_config_get_guest_notifier(vdev); 1069 ret = virtio_pci_one_vector_unmask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, 1070 msg, n); 1071 if (ret < 0) { 1072 goto undo_config; 1073 } 1074 } 1075 return 0; 1076 undo_config: 1077 n = virtio_config_get_guest_notifier(vdev); 1078 virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n); 1079 undo: 1080 vq = virtio_vector_first_queue(vdev, vector); 1081 while (vq && unmasked >= 0) { 1082 index = virtio_get_queue_index(vq); 1083 if (index < proxy->nvqs_with_notifiers) { 1084 n = virtio_queue_get_guest_notifier(vq); 1085 virtio_pci_one_vector_mask(proxy, index, vector, n); 1086 --unmasked; 1087 } 1088 vq = virtio_vector_next_queue(vq); 1089 } 1090 return ret; 1091 } 1092 1093 static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector) 1094 { 1095 VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); 1096 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 1097 VirtQueue *vq = virtio_vector_first_queue(vdev, vector); 1098 EventNotifier *n; 1099 int index; 1100 1101 while (vq) { 1102 index = virtio_get_queue_index(vq); 1103 n = virtio_queue_get_guest_notifier(vq); 1104 if (!virtio_queue_get_num(vdev, index)) { 1105 break; 1106 } 1107 if (index < proxy->nvqs_with_notifiers) { 1108 virtio_pci_one_vector_mask(proxy, index, vector, n); 1109 } 1110 vq = virtio_vector_next_queue(vq); 1111 } 1112 1113 if (vector == vdev->config_vector) { 1114 n = virtio_config_get_guest_notifier(vdev); 1115 virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n); 1116 } 1117 } 1118 1119 static void virtio_pci_vector_poll(PCIDevice *dev, 1120 unsigned int vector_start, 1121 unsigned int vector_end) 1122 { 1123 VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); 1124 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 1125 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1126 int queue_no; 1127 unsigned int vector; 1128 EventNotifier *notifier; 1129 int ret; 1130 1131 for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) { 1132 ret = virtio_pci_get_notifier(proxy, queue_no, ¬ifier, &vector); 1133 if (ret < 0) { 1134 break; 1135 } 1136 if (vector < vector_start || vector >= vector_end || 1137 !msix_is_masked(dev, vector)) { 1138 continue; 1139 } 1140 if (k->guest_notifier_pending) { 1141 if (k->guest_notifier_pending(vdev, queue_no)) { 1142 msix_set_pending(dev, vector); 1143 } 1144 } else if (event_notifier_test_and_clear(notifier)) { 1145 msix_set_pending(dev, vector); 1146 } 1147 } 1148 /* poll the config intr */ 1149 ret = virtio_pci_get_notifier(proxy, VIRTIO_CONFIG_IRQ_IDX, ¬ifier, 1150 &vector); 1151 if (ret < 0) { 1152 return; 1153 } 1154 if (vector < vector_start || vector >= vector_end || 1155 !msix_is_masked(dev, vector)) { 1156 return; 1157 } 1158 if (k->guest_notifier_pending) { 1159 if (k->guest_notifier_pending(vdev, VIRTIO_CONFIG_IRQ_IDX)) { 1160 msix_set_pending(dev, vector); 1161 } 1162 } else if (event_notifier_test_and_clear(notifier)) { 1163 msix_set_pending(dev, vector); 1164 } 1165 } 1166 1167 void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq, 1168 int n, bool assign, 1169 bool with_irqfd) 1170 { 1171 if (n == VIRTIO_CONFIG_IRQ_IDX) { 1172 virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); 1173 } else { 1174 virtio_queue_set_guest_notifier_fd_handler(vq, assign, with_irqfd); 1175 } 1176 } 1177 1178 static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, 1179 bool with_irqfd) 1180 { 1181 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); 1182 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 1183 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 1184 VirtQueue *vq = NULL; 1185 EventNotifier *notifier = NULL; 1186 1187 if (n == VIRTIO_CONFIG_IRQ_IDX) { 1188 notifier = virtio_config_get_guest_notifier(vdev); 1189 } else { 1190 vq = virtio_get_queue(vdev, n); 1191 notifier = virtio_queue_get_guest_notifier(vq); 1192 } 1193 1194 if (assign) { 1195 int r = event_notifier_init(notifier, 0); 1196 if (r < 0) { 1197 return r; 1198 } 1199 virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, true, with_irqfd); 1200 } else { 1201 virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, false, 1202 with_irqfd); 1203 event_notifier_cleanup(notifier); 1204 } 1205 1206 if (!msix_enabled(&proxy->pci_dev) && 1207 vdev->use_guest_notifier_mask && 1208 vdc->guest_notifier_mask) { 1209 vdc->guest_notifier_mask(vdev, n, !assign); 1210 } 1211 1212 return 0; 1213 } 1214 1215 static bool virtio_pci_query_guest_notifiers(DeviceState *d) 1216 { 1217 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); 1218 1219 if (msix_enabled(&proxy->pci_dev)) { 1220 return true; 1221 } else { 1222 return pci_irq_disabled(&proxy->pci_dev); 1223 } 1224 } 1225 1226 static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) 1227 { 1228 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); 1229 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 1230 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1231 int r, n; 1232 bool with_irqfd = msix_enabled(&proxy->pci_dev) && 1233 kvm_msi_via_irqfd_enabled(); 1234 1235 nvqs = MIN(nvqs, VIRTIO_QUEUE_MAX); 1236 1237 /* 1238 * When deassigning, pass a consistent nvqs value to avoid leaking 1239 * notifiers. But first check we've actually been configured, exit 1240 * early if we haven't. 1241 */ 1242 if (!assign && !proxy->nvqs_with_notifiers) { 1243 return 0; 1244 } 1245 assert(assign || nvqs == proxy->nvqs_with_notifiers); 1246 1247 proxy->nvqs_with_notifiers = nvqs; 1248 1249 /* Must unset vector notifier while guest notifier is still assigned */ 1250 if ((proxy->vector_irqfd || 1251 (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) && 1252 !assign) { 1253 msix_unset_vector_notifiers(&proxy->pci_dev); 1254 if (proxy->vector_irqfd) { 1255 kvm_virtio_pci_vector_vq_release(proxy, nvqs); 1256 kvm_virtio_pci_vector_config_release(proxy); 1257 g_free(proxy->vector_irqfd); 1258 proxy->vector_irqfd = NULL; 1259 } 1260 } 1261 1262 for (n = 0; n < nvqs; n++) { 1263 if (!virtio_queue_get_num(vdev, n)) { 1264 break; 1265 } 1266 1267 r = virtio_pci_set_guest_notifier(d, n, assign, with_irqfd); 1268 if (r < 0) { 1269 goto assign_error; 1270 } 1271 } 1272 r = virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, assign, 1273 with_irqfd); 1274 if (r < 0) { 1275 goto config_assign_error; 1276 } 1277 /* Must set vector notifier after guest notifier has been assigned */ 1278 if ((with_irqfd || 1279 (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) && 1280 assign) { 1281 if (with_irqfd) { 1282 proxy->vector_irqfd = 1283 g_malloc0(sizeof(*proxy->vector_irqfd) * 1284 msix_nr_vectors_allocated(&proxy->pci_dev)); 1285 r = kvm_virtio_pci_vector_vq_use(proxy, nvqs); 1286 if (r < 0) { 1287 goto config_assign_error; 1288 } 1289 r = kvm_virtio_pci_vector_config_use(proxy); 1290 if (r < 0) { 1291 goto config_error; 1292 } 1293 } 1294 1295 r = msix_set_vector_notifiers(&proxy->pci_dev, virtio_pci_vector_unmask, 1296 virtio_pci_vector_mask, 1297 virtio_pci_vector_poll); 1298 if (r < 0) { 1299 goto notifiers_error; 1300 } 1301 } 1302 1303 return 0; 1304 1305 notifiers_error: 1306 if (with_irqfd) { 1307 assert(assign); 1308 kvm_virtio_pci_vector_vq_release(proxy, nvqs); 1309 } 1310 config_error: 1311 if (with_irqfd) { 1312 kvm_virtio_pci_vector_config_release(proxy); 1313 } 1314 config_assign_error: 1315 virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, !assign, 1316 with_irqfd); 1317 assign_error: 1318 /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */ 1319 assert(assign); 1320 while (--n >= 0) { 1321 virtio_pci_set_guest_notifier(d, n, !assign, with_irqfd); 1322 } 1323 g_free(proxy->vector_irqfd); 1324 proxy->vector_irqfd = NULL; 1325 return r; 1326 } 1327 1328 static int virtio_pci_set_host_notifier_mr(DeviceState *d, int n, 1329 MemoryRegion *mr, bool assign) 1330 { 1331 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); 1332 int offset; 1333 1334 if (n >= VIRTIO_QUEUE_MAX || !virtio_pci_modern(proxy) || 1335 virtio_pci_queue_mem_mult(proxy) != memory_region_size(mr)) { 1336 return -1; 1337 } 1338 1339 if (assign) { 1340 offset = virtio_pci_queue_mem_mult(proxy) * n; 1341 memory_region_add_subregion_overlap(&proxy->notify.mr, offset, mr, 1); 1342 } else { 1343 memory_region_del_subregion(&proxy->notify.mr, mr); 1344 } 1345 1346 return 0; 1347 } 1348 1349 static void virtio_pci_vmstate_change(DeviceState *d, bool running) 1350 { 1351 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); 1352 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 1353 1354 if (running) { 1355 /* Old QEMU versions did not set bus master enable on status write. 1356 * Detect DRIVER set and enable it. 1357 */ 1358 if ((proxy->flags & VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION) && 1359 (vdev->status & VIRTIO_CONFIG_S_DRIVER) && 1360 !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { 1361 pci_default_write_config(&proxy->pci_dev, PCI_COMMAND, 1362 proxy->pci_dev.config[PCI_COMMAND] | 1363 PCI_COMMAND_MASTER, 1); 1364 } 1365 virtio_pci_start_ioeventfd(proxy); 1366 } else { 1367 virtio_pci_stop_ioeventfd(proxy); 1368 } 1369 } 1370 1371 /* 1372 * virtio-pci: This is the PCIDevice which has a virtio-pci-bus. 1373 */ 1374 1375 static int virtio_pci_query_nvectors(DeviceState *d) 1376 { 1377 VirtIOPCIProxy *proxy = VIRTIO_PCI(d); 1378 1379 return proxy->nvectors; 1380 } 1381 1382 static AddressSpace *virtio_pci_get_dma_as(DeviceState *d) 1383 { 1384 VirtIOPCIProxy *proxy = VIRTIO_PCI(d); 1385 PCIDevice *dev = &proxy->pci_dev; 1386 1387 return pci_get_address_space(dev); 1388 } 1389 1390 static bool virtio_pci_iommu_enabled(DeviceState *d) 1391 { 1392 VirtIOPCIProxy *proxy = VIRTIO_PCI(d); 1393 PCIDevice *dev = &proxy->pci_dev; 1394 AddressSpace *dma_as = pci_device_iommu_address_space(dev); 1395 1396 if (dma_as == &address_space_memory) { 1397 return false; 1398 } 1399 1400 return true; 1401 } 1402 1403 static bool virtio_pci_queue_enabled(DeviceState *d, int n) 1404 { 1405 VirtIOPCIProxy *proxy = VIRTIO_PCI(d); 1406 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 1407 1408 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 1409 return proxy->vqs[n].enabled; 1410 } 1411 1412 return virtio_queue_enabled_legacy(vdev, n); 1413 } 1414 1415 static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy, 1416 struct virtio_pci_cap *cap) 1417 { 1418 PCIDevice *dev = &proxy->pci_dev; 1419 int offset; 1420 1421 offset = pci_add_capability(dev, PCI_CAP_ID_VNDR, 0, 1422 cap->cap_len, &error_abort); 1423 1424 assert(cap->cap_len >= sizeof *cap); 1425 memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len, 1426 cap->cap_len - PCI_CAP_FLAGS); 1427 1428 return offset; 1429 } 1430 1431 static void virtio_pci_set_vector(VirtIODevice *vdev, 1432 VirtIOPCIProxy *proxy, 1433 int queue_no, uint16_t old_vector, 1434 uint16_t new_vector) 1435 { 1436 bool kvm_irqfd = (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) && 1437 msix_enabled(&proxy->pci_dev) && kvm_msi_via_irqfd_enabled(); 1438 1439 if (new_vector == old_vector) { 1440 return; 1441 } 1442 1443 /* 1444 * If the device uses irqfd and the vector changes after DRIVER_OK is 1445 * set, we need to release the old vector and set up the new one. 1446 * Otherwise just need to set the new vector on the device. 1447 */ 1448 if (kvm_irqfd && old_vector != VIRTIO_NO_VECTOR) { 1449 kvm_virtio_pci_vector_release_one(proxy, queue_no); 1450 } 1451 /* Set the new vector on the device. */ 1452 if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { 1453 vdev->config_vector = new_vector; 1454 } else { 1455 virtio_queue_set_vector(vdev, queue_no, new_vector); 1456 } 1457 /* If the new vector changed need to set it up. */ 1458 if (kvm_irqfd && new_vector != VIRTIO_NO_VECTOR) { 1459 kvm_virtio_pci_vector_use_one(proxy, queue_no); 1460 } 1461 } 1462 1463 int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy, 1464 uint8_t bar, uint64_t offset, uint64_t length, 1465 uint8_t id) 1466 { 1467 struct virtio_pci_cap64 cap = { 1468 .cap.cap_len = sizeof cap, 1469 .cap.cfg_type = VIRTIO_PCI_CAP_SHARED_MEMORY_CFG, 1470 }; 1471 1472 cap.cap.bar = bar; 1473 cap.cap.length = cpu_to_le32(length); 1474 cap.length_hi = cpu_to_le32(length >> 32); 1475 cap.cap.offset = cpu_to_le32(offset); 1476 cap.offset_hi = cpu_to_le32(offset >> 32); 1477 cap.cap.id = id; 1478 return virtio_pci_add_mem_cap(proxy, &cap.cap); 1479 } 1480 1481 static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr, 1482 unsigned size) 1483 { 1484 VirtIOPCIProxy *proxy = opaque; 1485 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 1486 uint32_t val = 0; 1487 int i; 1488 1489 if (vdev == NULL) { 1490 return UINT64_MAX; 1491 } 1492 1493 switch (addr) { 1494 case VIRTIO_PCI_COMMON_DFSELECT: 1495 val = proxy->dfselect; 1496 break; 1497 case VIRTIO_PCI_COMMON_DF: 1498 if (proxy->dfselect <= 1) { 1499 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 1500 1501 val = (vdev->host_features & ~vdc->legacy_features) >> 1502 (32 * proxy->dfselect); 1503 } 1504 break; 1505 case VIRTIO_PCI_COMMON_GFSELECT: 1506 val = proxy->gfselect; 1507 break; 1508 case VIRTIO_PCI_COMMON_GF: 1509 if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) { 1510 val = proxy->guest_features[proxy->gfselect]; 1511 } 1512 break; 1513 case VIRTIO_PCI_COMMON_MSIX: 1514 val = vdev->config_vector; 1515 break; 1516 case VIRTIO_PCI_COMMON_NUMQ: 1517 for (i = 0; i < VIRTIO_QUEUE_MAX; ++i) { 1518 if (virtio_queue_get_num(vdev, i)) { 1519 val = i + 1; 1520 } 1521 } 1522 break; 1523 case VIRTIO_PCI_COMMON_STATUS: 1524 val = vdev->status; 1525 break; 1526 case VIRTIO_PCI_COMMON_CFGGENERATION: 1527 val = vdev->generation; 1528 break; 1529 case VIRTIO_PCI_COMMON_Q_SELECT: 1530 val = vdev->queue_sel; 1531 break; 1532 case VIRTIO_PCI_COMMON_Q_SIZE: 1533 val = virtio_queue_get_num(vdev, vdev->queue_sel); 1534 break; 1535 case VIRTIO_PCI_COMMON_Q_MSIX: 1536 val = virtio_queue_vector(vdev, vdev->queue_sel); 1537 break; 1538 case VIRTIO_PCI_COMMON_Q_ENABLE: 1539 val = proxy->vqs[vdev->queue_sel].enabled; 1540 break; 1541 case VIRTIO_PCI_COMMON_Q_NOFF: 1542 /* Simply map queues in order */ 1543 val = vdev->queue_sel; 1544 break; 1545 case VIRTIO_PCI_COMMON_Q_DESCLO: 1546 val = proxy->vqs[vdev->queue_sel].desc[0]; 1547 break; 1548 case VIRTIO_PCI_COMMON_Q_DESCHI: 1549 val = proxy->vqs[vdev->queue_sel].desc[1]; 1550 break; 1551 case VIRTIO_PCI_COMMON_Q_AVAILLO: 1552 val = proxy->vqs[vdev->queue_sel].avail[0]; 1553 break; 1554 case VIRTIO_PCI_COMMON_Q_AVAILHI: 1555 val = proxy->vqs[vdev->queue_sel].avail[1]; 1556 break; 1557 case VIRTIO_PCI_COMMON_Q_USEDLO: 1558 val = proxy->vqs[vdev->queue_sel].used[0]; 1559 break; 1560 case VIRTIO_PCI_COMMON_Q_USEDHI: 1561 val = proxy->vqs[vdev->queue_sel].used[1]; 1562 break; 1563 case VIRTIO_PCI_COMMON_Q_RESET: 1564 val = proxy->vqs[vdev->queue_sel].reset; 1565 break; 1566 default: 1567 val = 0; 1568 } 1569 1570 return val; 1571 } 1572 1573 static void virtio_pci_common_write(void *opaque, hwaddr addr, 1574 uint64_t val, unsigned size) 1575 { 1576 VirtIOPCIProxy *proxy = opaque; 1577 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 1578 uint16_t vector; 1579 1580 if (vdev == NULL) { 1581 return; 1582 } 1583 1584 switch (addr) { 1585 case VIRTIO_PCI_COMMON_DFSELECT: 1586 proxy->dfselect = val; 1587 break; 1588 case VIRTIO_PCI_COMMON_GFSELECT: 1589 proxy->gfselect = val; 1590 break; 1591 case VIRTIO_PCI_COMMON_GF: 1592 if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) { 1593 proxy->guest_features[proxy->gfselect] = val; 1594 virtio_set_features(vdev, 1595 (((uint64_t)proxy->guest_features[1]) << 32) | 1596 proxy->guest_features[0]); 1597 } 1598 break; 1599 case VIRTIO_PCI_COMMON_MSIX: 1600 if (vdev->config_vector != VIRTIO_NO_VECTOR) { 1601 msix_vector_unuse(&proxy->pci_dev, vdev->config_vector); 1602 } 1603 /* Make it possible for guest to discover an error took place. */ 1604 if (val < proxy->nvectors) { 1605 msix_vector_use(&proxy->pci_dev, val); 1606 } else { 1607 val = VIRTIO_NO_VECTOR; 1608 } 1609 virtio_pci_set_vector(vdev, proxy, VIRTIO_CONFIG_IRQ_IDX, 1610 vdev->config_vector, val); 1611 break; 1612 case VIRTIO_PCI_COMMON_STATUS: 1613 if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) { 1614 virtio_pci_stop_ioeventfd(proxy); 1615 } 1616 1617 virtio_set_status(vdev, val & 0xFF); 1618 1619 if (val & VIRTIO_CONFIG_S_DRIVER_OK) { 1620 virtio_pci_start_ioeventfd(proxy); 1621 } 1622 1623 if (vdev->status == 0) { 1624 virtio_pci_reset(DEVICE(proxy)); 1625 } 1626 1627 break; 1628 case VIRTIO_PCI_COMMON_Q_SELECT: 1629 if (val < VIRTIO_QUEUE_MAX) { 1630 vdev->queue_sel = val; 1631 } 1632 break; 1633 case VIRTIO_PCI_COMMON_Q_SIZE: 1634 proxy->vqs[vdev->queue_sel].num = val; 1635 virtio_queue_set_num(vdev, vdev->queue_sel, 1636 proxy->vqs[vdev->queue_sel].num); 1637 virtio_init_region_cache(vdev, vdev->queue_sel); 1638 break; 1639 case VIRTIO_PCI_COMMON_Q_MSIX: 1640 vector = virtio_queue_vector(vdev, vdev->queue_sel); 1641 if (vector != VIRTIO_NO_VECTOR) { 1642 msix_vector_unuse(&proxy->pci_dev, vector); 1643 } 1644 /* Make it possible for guest to discover an error took place. */ 1645 if (val < proxy->nvectors) { 1646 msix_vector_use(&proxy->pci_dev, val); 1647 } else { 1648 val = VIRTIO_NO_VECTOR; 1649 } 1650 virtio_pci_set_vector(vdev, proxy, vdev->queue_sel, vector, val); 1651 break; 1652 case VIRTIO_PCI_COMMON_Q_ENABLE: 1653 if (val == 1) { 1654 virtio_queue_set_num(vdev, vdev->queue_sel, 1655 proxy->vqs[vdev->queue_sel].num); 1656 virtio_queue_set_rings(vdev, vdev->queue_sel, 1657 ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 | 1658 proxy->vqs[vdev->queue_sel].desc[0], 1659 ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 | 1660 proxy->vqs[vdev->queue_sel].avail[0], 1661 ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 | 1662 proxy->vqs[vdev->queue_sel].used[0]); 1663 proxy->vqs[vdev->queue_sel].enabled = 1; 1664 proxy->vqs[vdev->queue_sel].reset = 0; 1665 virtio_queue_enable(vdev, vdev->queue_sel); 1666 } else { 1667 virtio_error(vdev, "wrong value for queue_enable %"PRIx64, val); 1668 } 1669 break; 1670 case VIRTIO_PCI_COMMON_Q_DESCLO: 1671 proxy->vqs[vdev->queue_sel].desc[0] = val; 1672 break; 1673 case VIRTIO_PCI_COMMON_Q_DESCHI: 1674 proxy->vqs[vdev->queue_sel].desc[1] = val; 1675 break; 1676 case VIRTIO_PCI_COMMON_Q_AVAILLO: 1677 proxy->vqs[vdev->queue_sel].avail[0] = val; 1678 break; 1679 case VIRTIO_PCI_COMMON_Q_AVAILHI: 1680 proxy->vqs[vdev->queue_sel].avail[1] = val; 1681 break; 1682 case VIRTIO_PCI_COMMON_Q_USEDLO: 1683 proxy->vqs[vdev->queue_sel].used[0] = val; 1684 break; 1685 case VIRTIO_PCI_COMMON_Q_USEDHI: 1686 proxy->vqs[vdev->queue_sel].used[1] = val; 1687 break; 1688 case VIRTIO_PCI_COMMON_Q_RESET: 1689 if (val == 1) { 1690 proxy->vqs[vdev->queue_sel].reset = 1; 1691 1692 virtio_queue_reset(vdev, vdev->queue_sel); 1693 1694 proxy->vqs[vdev->queue_sel].reset = 0; 1695 proxy->vqs[vdev->queue_sel].enabled = 0; 1696 } 1697 break; 1698 default: 1699 break; 1700 } 1701 } 1702 1703 1704 static uint64_t virtio_pci_notify_read(void *opaque, hwaddr addr, 1705 unsigned size) 1706 { 1707 VirtIOPCIProxy *proxy = opaque; 1708 if (virtio_bus_get_device(&proxy->bus) == NULL) { 1709 return UINT64_MAX; 1710 } 1711 1712 return 0; 1713 } 1714 1715 static void virtio_pci_notify_write(void *opaque, hwaddr addr, 1716 uint64_t val, unsigned size) 1717 { 1718 VirtIOPCIProxy *proxy = opaque; 1719 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 1720 1721 unsigned queue = addr / virtio_pci_queue_mem_mult(proxy); 1722 1723 if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) { 1724 trace_virtio_pci_notify_write(addr, val, size); 1725 virtio_queue_notify(vdev, queue); 1726 } 1727 } 1728 1729 static void virtio_pci_notify_write_pio(void *opaque, hwaddr addr, 1730 uint64_t val, unsigned size) 1731 { 1732 VirtIOPCIProxy *proxy = opaque; 1733 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 1734 1735 unsigned queue = val; 1736 1737 if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) { 1738 trace_virtio_pci_notify_write_pio(addr, val, size); 1739 virtio_queue_notify(vdev, queue); 1740 } 1741 } 1742 1743 static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr, 1744 unsigned size) 1745 { 1746 VirtIOPCIProxy *proxy = opaque; 1747 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 1748 uint64_t val; 1749 1750 if (vdev == NULL) { 1751 return UINT64_MAX; 1752 } 1753 1754 val = qatomic_xchg(&vdev->isr, 0); 1755 pci_irq_deassert(&proxy->pci_dev); 1756 return val; 1757 } 1758 1759 static void virtio_pci_isr_write(void *opaque, hwaddr addr, 1760 uint64_t val, unsigned size) 1761 { 1762 } 1763 1764 static uint64_t virtio_pci_device_read(void *opaque, hwaddr addr, 1765 unsigned size) 1766 { 1767 VirtIOPCIProxy *proxy = opaque; 1768 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 1769 uint64_t val; 1770 1771 if (vdev == NULL) { 1772 return UINT64_MAX; 1773 } 1774 1775 switch (size) { 1776 case 1: 1777 val = virtio_config_modern_readb(vdev, addr); 1778 break; 1779 case 2: 1780 val = virtio_config_modern_readw(vdev, addr); 1781 break; 1782 case 4: 1783 val = virtio_config_modern_readl(vdev, addr); 1784 break; 1785 default: 1786 val = 0; 1787 break; 1788 } 1789 return val; 1790 } 1791 1792 static void virtio_pci_device_write(void *opaque, hwaddr addr, 1793 uint64_t val, unsigned size) 1794 { 1795 VirtIOPCIProxy *proxy = opaque; 1796 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 1797 1798 if (vdev == NULL) { 1799 return; 1800 } 1801 1802 switch (size) { 1803 case 1: 1804 virtio_config_modern_writeb(vdev, addr, val); 1805 break; 1806 case 2: 1807 virtio_config_modern_writew(vdev, addr, val); 1808 break; 1809 case 4: 1810 virtio_config_modern_writel(vdev, addr, val); 1811 break; 1812 } 1813 } 1814 1815 static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy, 1816 const char *vdev_name) 1817 { 1818 static const MemoryRegionOps common_ops = { 1819 .read = virtio_pci_common_read, 1820 .write = virtio_pci_common_write, 1821 .impl = { 1822 .min_access_size = 1, 1823 .max_access_size = 4, 1824 }, 1825 .endianness = DEVICE_LITTLE_ENDIAN, 1826 }; 1827 static const MemoryRegionOps isr_ops = { 1828 .read = virtio_pci_isr_read, 1829 .write = virtio_pci_isr_write, 1830 .impl = { 1831 .min_access_size = 1, 1832 .max_access_size = 4, 1833 }, 1834 .endianness = DEVICE_LITTLE_ENDIAN, 1835 }; 1836 static const MemoryRegionOps device_ops = { 1837 .read = virtio_pci_device_read, 1838 .write = virtio_pci_device_write, 1839 .impl = { 1840 .min_access_size = 1, 1841 .max_access_size = 4, 1842 }, 1843 .endianness = DEVICE_LITTLE_ENDIAN, 1844 }; 1845 static const MemoryRegionOps notify_ops = { 1846 .read = virtio_pci_notify_read, 1847 .write = virtio_pci_notify_write, 1848 .impl = { 1849 .min_access_size = 1, 1850 .max_access_size = 4, 1851 }, 1852 .endianness = DEVICE_LITTLE_ENDIAN, 1853 }; 1854 static const MemoryRegionOps notify_pio_ops = { 1855 .read = virtio_pci_notify_read, 1856 .write = virtio_pci_notify_write_pio, 1857 .impl = { 1858 .min_access_size = 1, 1859 .max_access_size = 4, 1860 }, 1861 .endianness = DEVICE_LITTLE_ENDIAN, 1862 }; 1863 g_autoptr(GString) name = g_string_new(NULL); 1864 1865 g_string_printf(name, "virtio-pci-common-%s", vdev_name); 1866 memory_region_init_io(&proxy->common.mr, OBJECT(proxy), 1867 &common_ops, 1868 proxy, 1869 name->str, 1870 proxy->common.size); 1871 1872 g_string_printf(name, "virtio-pci-isr-%s", vdev_name); 1873 memory_region_init_io(&proxy->isr.mr, OBJECT(proxy), 1874 &isr_ops, 1875 proxy, 1876 name->str, 1877 proxy->isr.size); 1878 1879 g_string_printf(name, "virtio-pci-device-%s", vdev_name); 1880 memory_region_init_io(&proxy->device.mr, OBJECT(proxy), 1881 &device_ops, 1882 proxy, 1883 name->str, 1884 proxy->device.size); 1885 1886 g_string_printf(name, "virtio-pci-notify-%s", vdev_name); 1887 memory_region_init_io(&proxy->notify.mr, OBJECT(proxy), 1888 ¬ify_ops, 1889 proxy, 1890 name->str, 1891 proxy->notify.size); 1892 1893 g_string_printf(name, "virtio-pci-notify-pio-%s", vdev_name); 1894 memory_region_init_io(&proxy->notify_pio.mr, OBJECT(proxy), 1895 ¬ify_pio_ops, 1896 proxy, 1897 name->str, 1898 proxy->notify_pio.size); 1899 } 1900 1901 static void virtio_pci_modern_region_map(VirtIOPCIProxy *proxy, 1902 VirtIOPCIRegion *region, 1903 struct virtio_pci_cap *cap, 1904 MemoryRegion *mr, 1905 uint8_t bar) 1906 { 1907 memory_region_add_subregion(mr, region->offset, ®ion->mr); 1908 1909 cap->cfg_type = region->type; 1910 cap->bar = bar; 1911 cap->offset = cpu_to_le32(region->offset); 1912 cap->length = cpu_to_le32(region->size); 1913 virtio_pci_add_mem_cap(proxy, cap); 1914 1915 } 1916 1917 static void virtio_pci_modern_mem_region_map(VirtIOPCIProxy *proxy, 1918 VirtIOPCIRegion *region, 1919 struct virtio_pci_cap *cap) 1920 { 1921 virtio_pci_modern_region_map(proxy, region, cap, 1922 &proxy->modern_bar, proxy->modern_mem_bar_idx); 1923 } 1924 1925 static void virtio_pci_modern_io_region_map(VirtIOPCIProxy *proxy, 1926 VirtIOPCIRegion *region, 1927 struct virtio_pci_cap *cap) 1928 { 1929 virtio_pci_modern_region_map(proxy, region, cap, 1930 &proxy->io_bar, proxy->modern_io_bar_idx); 1931 } 1932 1933 static void virtio_pci_modern_mem_region_unmap(VirtIOPCIProxy *proxy, 1934 VirtIOPCIRegion *region) 1935 { 1936 memory_region_del_subregion(&proxy->modern_bar, 1937 ®ion->mr); 1938 } 1939 1940 static void virtio_pci_modern_io_region_unmap(VirtIOPCIProxy *proxy, 1941 VirtIOPCIRegion *region) 1942 { 1943 memory_region_del_subregion(&proxy->io_bar, 1944 ®ion->mr); 1945 } 1946 1947 static void virtio_pci_pre_plugged(DeviceState *d, Error **errp) 1948 { 1949 VirtIOPCIProxy *proxy = VIRTIO_PCI(d); 1950 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 1951 1952 if (virtio_pci_modern(proxy)) { 1953 virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1); 1954 } 1955 1956 virtio_add_feature(&vdev->host_features, VIRTIO_F_BAD_FEATURE); 1957 } 1958 1959 /* This is called by virtio-bus just after the device is plugged. */ 1960 static void virtio_pci_device_plugged(DeviceState *d, Error **errp) 1961 { 1962 VirtIOPCIProxy *proxy = VIRTIO_PCI(d); 1963 VirtioBusState *bus = &proxy->bus; 1964 bool legacy = virtio_pci_legacy(proxy); 1965 bool modern; 1966 bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; 1967 uint8_t *config; 1968 uint32_t size; 1969 VirtIODevice *vdev = virtio_bus_get_device(bus); 1970 int16_t res; 1971 1972 /* 1973 * Virtio capabilities present without 1974 * VIRTIO_F_VERSION_1 confuses guests 1975 */ 1976 if (!proxy->ignore_backend_features && 1977 !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) { 1978 virtio_pci_disable_modern(proxy); 1979 1980 if (!legacy) { 1981 error_setg(errp, "Device doesn't support modern mode, and legacy" 1982 " mode is disabled"); 1983 error_append_hint(errp, "Set disable-legacy to off\n"); 1984 1985 return; 1986 } 1987 } 1988 1989 modern = virtio_pci_modern(proxy); 1990 1991 config = proxy->pci_dev.config; 1992 if (proxy->class_code) { 1993 pci_config_set_class(config, proxy->class_code); 1994 } 1995 1996 if (legacy) { 1997 if (!virtio_legacy_allowed(vdev)) { 1998 /* 1999 * To avoid migration issues, we allow legacy mode when legacy 2000 * check is disabled in the old machine types (< 5.1). 2001 */ 2002 if (virtio_legacy_check_disabled(vdev)) { 2003 warn_report("device is modern-only, but for backward " 2004 "compatibility legacy is allowed"); 2005 } else { 2006 error_setg(errp, 2007 "device is modern-only, use disable-legacy=on"); 2008 return; 2009 } 2010 } 2011 if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) { 2012 error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by" 2013 " neither legacy nor transitional device"); 2014 return; 2015 } 2016 /* 2017 * Legacy and transitional devices use specific subsystem IDs. 2018 * Note that the subsystem vendor ID (config + PCI_SUBSYSTEM_VENDOR_ID) 2019 * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default. 2020 */ 2021 pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus)); 2022 if (proxy->trans_devid) { 2023 pci_config_set_device_id(config, proxy->trans_devid); 2024 } 2025 } else { 2026 /* pure virtio-1.0 */ 2027 pci_set_word(config + PCI_VENDOR_ID, 2028 PCI_VENDOR_ID_REDHAT_QUMRANET); 2029 pci_set_word(config + PCI_DEVICE_ID, 2030 PCI_DEVICE_ID_VIRTIO_10_BASE + virtio_bus_get_vdev_id(bus)); 2031 pci_config_set_revision(config, 1); 2032 } 2033 config[PCI_INTERRUPT_PIN] = 1; 2034 2035 2036 if (modern) { 2037 struct virtio_pci_cap cap = { 2038 .cap_len = sizeof cap, 2039 }; 2040 struct virtio_pci_notify_cap notify = { 2041 .cap.cap_len = sizeof notify, 2042 .notify_off_multiplier = 2043 cpu_to_le32(virtio_pci_queue_mem_mult(proxy)), 2044 }; 2045 struct virtio_pci_cfg_cap cfg = { 2046 .cap.cap_len = sizeof cfg, 2047 .cap.cfg_type = VIRTIO_PCI_CAP_PCI_CFG, 2048 }; 2049 struct virtio_pci_notify_cap notify_pio = { 2050 .cap.cap_len = sizeof notify, 2051 .notify_off_multiplier = cpu_to_le32(0x0), 2052 }; 2053 2054 struct virtio_pci_cfg_cap *cfg_mask; 2055 2056 virtio_pci_modern_regions_init(proxy, vdev->name); 2057 2058 virtio_pci_modern_mem_region_map(proxy, &proxy->common, &cap); 2059 virtio_pci_modern_mem_region_map(proxy, &proxy->isr, &cap); 2060 virtio_pci_modern_mem_region_map(proxy, &proxy->device, &cap); 2061 virtio_pci_modern_mem_region_map(proxy, &proxy->notify, ¬ify.cap); 2062 2063 if (modern_pio) { 2064 memory_region_init(&proxy->io_bar, OBJECT(proxy), 2065 "virtio-pci-io", 0x4); 2066 address_space_init(&proxy->modern_cfg_io_as, &proxy->io_bar, 2067 "virtio-pci-cfg-io-as"); 2068 2069 pci_register_bar(&proxy->pci_dev, proxy->modern_io_bar_idx, 2070 PCI_BASE_ADDRESS_SPACE_IO, &proxy->io_bar); 2071 2072 virtio_pci_modern_io_region_map(proxy, &proxy->notify_pio, 2073 ¬ify_pio.cap); 2074 } 2075 2076 pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar_idx, 2077 PCI_BASE_ADDRESS_SPACE_MEMORY | 2078 PCI_BASE_ADDRESS_MEM_PREFETCH | 2079 PCI_BASE_ADDRESS_MEM_TYPE_64, 2080 &proxy->modern_bar); 2081 2082 proxy->config_cap = virtio_pci_add_mem_cap(proxy, &cfg.cap); 2083 cfg_mask = (void *)(proxy->pci_dev.wmask + proxy->config_cap); 2084 pci_set_byte(&cfg_mask->cap.bar, ~0x0); 2085 pci_set_long((uint8_t *)&cfg_mask->cap.offset, ~0x0); 2086 pci_set_long((uint8_t *)&cfg_mask->cap.length, ~0x0); 2087 pci_set_long(cfg_mask->pci_cfg_data, ~0x0); 2088 } 2089 2090 if (proxy->nvectors) { 2091 int err = msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors, 2092 proxy->msix_bar_idx, NULL); 2093 if (err) { 2094 /* Notice when a system that supports MSIx can't initialize it */ 2095 if (err != -ENOTSUP) { 2096 warn_report("unable to init msix vectors to %" PRIu32, 2097 proxy->nvectors); 2098 } 2099 proxy->nvectors = 0; 2100 } 2101 } 2102 2103 proxy->pci_dev.config_write = virtio_write_config; 2104 proxy->pci_dev.config_read = virtio_read_config; 2105 2106 if (legacy) { 2107 size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev) 2108 + virtio_bus_get_vdev_config_len(bus); 2109 size = pow2ceil(size); 2110 2111 memory_region_init_io(&proxy->bar, OBJECT(proxy), 2112 &virtio_pci_config_ops, 2113 proxy, "virtio-pci", size); 2114 2115 pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx, 2116 PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar); 2117 } 2118 2119 if (pci_is_vf(&proxy->pci_dev)) { 2120 pcie_ari_init(&proxy->pci_dev, proxy->last_pcie_cap_offset); 2121 proxy->last_pcie_cap_offset += PCI_ARI_SIZEOF; 2122 } else { 2123 res = pcie_sriov_pf_init_from_user_created_vfs( 2124 &proxy->pci_dev, proxy->last_pcie_cap_offset, errp); 2125 if (res > 0) { 2126 proxy->last_pcie_cap_offset += res; 2127 virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV); 2128 } 2129 } 2130 } 2131 2132 static void virtio_pci_device_unplugged(DeviceState *d) 2133 { 2134 VirtIOPCIProxy *proxy = VIRTIO_PCI(d); 2135 bool modern = virtio_pci_modern(proxy); 2136 bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; 2137 2138 virtio_pci_stop_ioeventfd(proxy); 2139 2140 if (modern) { 2141 virtio_pci_modern_mem_region_unmap(proxy, &proxy->common); 2142 virtio_pci_modern_mem_region_unmap(proxy, &proxy->isr); 2143 virtio_pci_modern_mem_region_unmap(proxy, &proxy->device); 2144 virtio_pci_modern_mem_region_unmap(proxy, &proxy->notify); 2145 if (modern_pio) { 2146 virtio_pci_modern_io_region_unmap(proxy, &proxy->notify_pio); 2147 } 2148 } 2149 } 2150 2151 static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) 2152 { 2153 VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev); 2154 VirtioPCIClass *k = VIRTIO_PCI_GET_CLASS(pci_dev); 2155 bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) && 2156 !pci_bus_is_root(pci_get_bus(pci_dev)); 2157 2158 /* fd-based ioevents can't be synchronized in record/replay */ 2159 if (replay_mode != REPLAY_MODE_NONE) { 2160 proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD; 2161 } 2162 2163 /* 2164 * virtio pci bar layout used by default. 2165 * subclasses can re-arrange things if needed. 2166 * 2167 * region 0 -- virtio legacy io bar 2168 * region 1 -- msi-x bar 2169 * region 2 -- virtio modern io bar (off by default) 2170 * region 4+5 -- virtio modern memory (64bit) bar 2171 * 2172 */ 2173 proxy->legacy_io_bar_idx = 0; 2174 proxy->msix_bar_idx = 1; 2175 proxy->modern_io_bar_idx = 2; 2176 proxy->modern_mem_bar_idx = 4; 2177 2178 proxy->common.offset = 0x0; 2179 proxy->common.size = 0x1000; 2180 proxy->common.type = VIRTIO_PCI_CAP_COMMON_CFG; 2181 2182 proxy->isr.offset = 0x1000; 2183 proxy->isr.size = 0x1000; 2184 proxy->isr.type = VIRTIO_PCI_CAP_ISR_CFG; 2185 2186 proxy->device.offset = 0x2000; 2187 proxy->device.size = 0x1000; 2188 proxy->device.type = VIRTIO_PCI_CAP_DEVICE_CFG; 2189 2190 proxy->notify.offset = 0x3000; 2191 proxy->notify.size = virtio_pci_queue_mem_mult(proxy) * VIRTIO_QUEUE_MAX; 2192 proxy->notify.type = VIRTIO_PCI_CAP_NOTIFY_CFG; 2193 2194 proxy->notify_pio.offset = 0x0; 2195 proxy->notify_pio.size = 0x4; 2196 proxy->notify_pio.type = VIRTIO_PCI_CAP_NOTIFY_CFG; 2197 2198 /* subclasses can enforce modern, so do this unconditionally */ 2199 memory_region_init(&proxy->modern_bar, OBJECT(proxy), "virtio-pci", 2200 /* PCI BAR regions must be powers of 2 */ 2201 pow2ceil(proxy->notify.offset + proxy->notify.size)); 2202 2203 address_space_init(&proxy->modern_cfg_mem_as, &proxy->modern_bar, 2204 "virtio-pci-cfg-mem-as"); 2205 2206 if (proxy->disable_legacy == ON_OFF_AUTO_AUTO) { 2207 proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; 2208 } 2209 2210 if (!virtio_pci_modern(proxy) && !virtio_pci_legacy(proxy)) { 2211 error_setg(errp, "device cannot work as neither modern nor legacy mode" 2212 " is enabled"); 2213 error_append_hint(errp, "Set either disable-modern or disable-legacy" 2214 " to off\n"); 2215 return; 2216 } 2217 2218 if (pcie_port && pci_is_express(pci_dev)) { 2219 int pos; 2220 proxy->last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE; 2221 2222 pos = pcie_endpoint_cap_init(pci_dev, 0); 2223 assert(pos > 0); 2224 2225 pos = pci_pm_init(pci_dev, 0, errp); 2226 if (pos < 0) { 2227 return; 2228 } 2229 2230 /* 2231 * Indicates that this function complies with revision 1.2 of the 2232 * PCI Power Management Interface Specification. 2233 */ 2234 pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3); 2235 2236 if (proxy->flags & VIRTIO_PCI_FLAG_AER) { 2237 pcie_aer_init(pci_dev, PCI_ERR_VER, proxy->last_pcie_cap_offset, 2238 PCI_ERR_SIZEOF, NULL); 2239 proxy->last_pcie_cap_offset += PCI_ERR_SIZEOF; 2240 } 2241 2242 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) { 2243 /* Init error enabling flags */ 2244 pcie_cap_deverr_init(pci_dev); 2245 } 2246 2247 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_LNKCTL) { 2248 /* Init Link Control Register */ 2249 pcie_cap_lnkctl_init(pci_dev); 2250 } 2251 2252 if (proxy->flags & VIRTIO_PCI_FLAG_PM_NO_SOFT_RESET) { 2253 pci_set_word(pci_dev->config + pos + PCI_PM_CTRL, 2254 PCI_PM_CTRL_NO_SOFT_RESET); 2255 } 2256 2257 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) { 2258 /* Init Power Management Control Register */ 2259 pci_set_word(pci_dev->wmask + pos + PCI_PM_CTRL, 2260 PCI_PM_CTRL_STATE_MASK); 2261 } 2262 2263 if (proxy->flags & VIRTIO_PCI_FLAG_ATS) { 2264 pcie_ats_init(pci_dev, proxy->last_pcie_cap_offset, 2265 proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED); 2266 proxy->last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF; 2267 } 2268 2269 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) { 2270 /* Set Function Level Reset capability bit */ 2271 pcie_cap_flr_init(pci_dev); 2272 } 2273 } else { 2274 /* 2275 * make future invocations of pci_is_express() return false 2276 * and pci_config_size() return PCI_CONFIG_SPACE_SIZE. 2277 */ 2278 pci_dev->cap_present &= ~QEMU_PCI_CAP_EXPRESS; 2279 } 2280 2281 virtio_pci_bus_new(&proxy->bus, sizeof(proxy->bus), proxy); 2282 if (k->realize) { 2283 k->realize(proxy, errp); 2284 } 2285 } 2286 2287 static void virtio_pci_exit(PCIDevice *pci_dev) 2288 { 2289 VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev); 2290 bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) && 2291 !pci_bus_is_root(pci_get_bus(pci_dev)); 2292 bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; 2293 2294 pcie_sriov_pf_exit(&proxy->pci_dev); 2295 msix_uninit_exclusive_bar(pci_dev); 2296 if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port && 2297 pci_is_express(pci_dev)) { 2298 pcie_aer_exit(pci_dev); 2299 } 2300 address_space_destroy(&proxy->modern_cfg_mem_as); 2301 if (modern_pio) { 2302 address_space_destroy(&proxy->modern_cfg_io_as); 2303 } 2304 } 2305 2306 static void virtio_pci_reset(DeviceState *qdev) 2307 { 2308 VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev); 2309 VirtioBusState *bus = VIRTIO_BUS(&proxy->bus); 2310 int i; 2311 2312 virtio_bus_reset(bus); 2313 msix_unuse_all_vectors(&proxy->pci_dev); 2314 2315 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2316 proxy->vqs[i].enabled = 0; 2317 proxy->vqs[i].reset = 0; 2318 proxy->vqs[i].num = 0; 2319 proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0; 2320 proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0; 2321 proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0; 2322 } 2323 } 2324 2325 static bool virtio_pci_no_soft_reset(PCIDevice *dev) 2326 { 2327 uint16_t pmcsr; 2328 2329 if (!pci_is_express(dev) || !(dev->cap_present & QEMU_PCI_CAP_PM)) { 2330 return false; 2331 } 2332 2333 pmcsr = pci_get_word(dev->config + dev->pm_cap + PCI_PM_CTRL); 2334 2335 /* 2336 * When No_Soft_Reset bit is set and the device 2337 * is in D3hot state, don't reset device 2338 */ 2339 return (pmcsr & PCI_PM_CTRL_NO_SOFT_RESET) && 2340 (pmcsr & PCI_PM_CTRL_STATE_MASK) == 3; 2341 } 2342 2343 static void virtio_pci_bus_reset_hold(Object *obj, ResetType type) 2344 { 2345 PCIDevice *dev = PCI_DEVICE(obj); 2346 DeviceState *qdev = DEVICE(obj); 2347 2348 if (virtio_pci_no_soft_reset(dev)) { 2349 return; 2350 } 2351 2352 virtio_pci_reset(qdev); 2353 2354 if (pci_is_express(dev)) { 2355 VirtIOPCIProxy *proxy = VIRTIO_PCI(dev); 2356 2357 pcie_cap_deverr_reset(dev); 2358 pcie_cap_lnkctl_reset(dev); 2359 2360 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) { 2361 pci_word_test_and_clear_mask( 2362 dev->config + dev->pm_cap + PCI_PM_CTRL, 2363 PCI_PM_CTRL_STATE_MASK); 2364 } 2365 } 2366 } 2367 2368 static const Property virtio_pci_properties[] = { 2369 DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags, 2370 VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false), 2371 DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags, 2372 VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true), 2373 DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags, 2374 VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false), 2375 DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags, 2376 VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false), 2377 DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags, 2378 VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false), 2379 DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy, 2380 ignore_backend_features, false), 2381 DEFINE_PROP_BIT("ats", VirtIOPCIProxy, flags, 2382 VIRTIO_PCI_FLAG_ATS_BIT, false), 2383 DEFINE_PROP_BIT("x-ats-page-aligned", VirtIOPCIProxy, flags, 2384 VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT, true), 2385 DEFINE_PROP_BIT("x-pcie-deverr-init", VirtIOPCIProxy, flags, 2386 VIRTIO_PCI_FLAG_INIT_DEVERR_BIT, true), 2387 DEFINE_PROP_BIT("x-pcie-lnkctl-init", VirtIOPCIProxy, flags, 2388 VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT, true), 2389 DEFINE_PROP_BIT("x-pcie-pm-init", VirtIOPCIProxy, flags, 2390 VIRTIO_PCI_FLAG_INIT_PM_BIT, true), 2391 DEFINE_PROP_BIT("x-pcie-pm-no-soft-reset", VirtIOPCIProxy, flags, 2392 VIRTIO_PCI_FLAG_PM_NO_SOFT_RESET_BIT, false), 2393 DEFINE_PROP_BIT("x-pcie-flr-init", VirtIOPCIProxy, flags, 2394 VIRTIO_PCI_FLAG_INIT_FLR_BIT, true), 2395 DEFINE_PROP_BIT("aer", VirtIOPCIProxy, flags, 2396 VIRTIO_PCI_FLAG_AER_BIT, false), 2397 }; 2398 2399 static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp) 2400 { 2401 VirtioPCIClass *vpciklass = VIRTIO_PCI_GET_CLASS(qdev); 2402 VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev); 2403 PCIDevice *pci_dev = &proxy->pci_dev; 2404 2405 if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) && 2406 virtio_pci_modern(proxy)) { 2407 pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; 2408 } 2409 2410 vpciklass->parent_dc_realize(qdev, errp); 2411 } 2412 2413 static int virtio_pci_sync_config(DeviceState *dev, Error **errp) 2414 { 2415 VirtIOPCIProxy *proxy = VIRTIO_PCI(dev); 2416 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); 2417 2418 return qdev_sync_config(DEVICE(vdev), errp); 2419 } 2420 2421 static void virtio_pci_class_init(ObjectClass *klass, const void *data) 2422 { 2423 DeviceClass *dc = DEVICE_CLASS(klass); 2424 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 2425 VirtioPCIClass *vpciklass = VIRTIO_PCI_CLASS(klass); 2426 ResettableClass *rc = RESETTABLE_CLASS(klass); 2427 2428 device_class_set_props(dc, virtio_pci_properties); 2429 k->realize = virtio_pci_realize; 2430 k->exit = virtio_pci_exit; 2431 k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; 2432 k->revision = VIRTIO_PCI_ABI_VERSION; 2433 k->class_id = PCI_CLASS_OTHERS; 2434 device_class_set_parent_realize(dc, virtio_pci_dc_realize, 2435 &vpciklass->parent_dc_realize); 2436 rc->phases.hold = virtio_pci_bus_reset_hold; 2437 dc->sync_config = virtio_pci_sync_config; 2438 } 2439 2440 static const TypeInfo virtio_pci_info = { 2441 .name = TYPE_VIRTIO_PCI, 2442 .parent = TYPE_PCI_DEVICE, 2443 .instance_size = sizeof(VirtIOPCIProxy), 2444 .class_init = virtio_pci_class_init, 2445 .class_size = sizeof(VirtioPCIClass), 2446 .abstract = true, 2447 }; 2448 2449 static const Property virtio_pci_generic_properties[] = { 2450 DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy, 2451 ON_OFF_AUTO_AUTO), 2452 DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false), 2453 }; 2454 2455 static void virtio_pci_base_class_init(ObjectClass *klass, const void *data) 2456 { 2457 const VirtioPCIDeviceTypeInfo *t = data; 2458 if (t->class_init) { 2459 t->class_init(klass, NULL); 2460 } 2461 } 2462 2463 static void virtio_pci_generic_class_init(ObjectClass *klass, const void *data) 2464 { 2465 DeviceClass *dc = DEVICE_CLASS(klass); 2466 2467 device_class_set_props(dc, virtio_pci_generic_properties); 2468 } 2469 2470 static void virtio_pci_transitional_instance_init(Object *obj) 2471 { 2472 VirtIOPCIProxy *proxy = VIRTIO_PCI(obj); 2473 2474 proxy->disable_legacy = ON_OFF_AUTO_OFF; 2475 proxy->disable_modern = false; 2476 } 2477 2478 static void virtio_pci_non_transitional_instance_init(Object *obj) 2479 { 2480 VirtIOPCIProxy *proxy = VIRTIO_PCI(obj); 2481 2482 proxy->disable_legacy = ON_OFF_AUTO_ON; 2483 proxy->disable_modern = false; 2484 } 2485 2486 void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) 2487 { 2488 char *base_name = NULL; 2489 TypeInfo base_type_info = { 2490 .name = t->base_name, 2491 .parent = t->parent ? t->parent : TYPE_VIRTIO_PCI, 2492 .instance_size = t->instance_size, 2493 .instance_init = t->instance_init, 2494 .instance_finalize = t->instance_finalize, 2495 .class_size = t->class_size, 2496 .abstract = true, 2497 .interfaces = t->interfaces, 2498 }; 2499 TypeInfo generic_type_info = { 2500 .name = t->generic_name, 2501 .parent = base_type_info.name, 2502 .class_init = virtio_pci_generic_class_init, 2503 .interfaces = (const InterfaceInfo[]) { 2504 { INTERFACE_PCIE_DEVICE }, 2505 { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 2506 { } 2507 }, 2508 }; 2509 2510 if (!base_type_info.name) { 2511 /* No base type -> register a single generic device type */ 2512 /* use intermediate %s-base-type to add generic device props */ 2513 base_name = g_strdup_printf("%s-base-type", t->generic_name); 2514 base_type_info.name = base_name; 2515 base_type_info.class_init = virtio_pci_generic_class_init; 2516 2517 generic_type_info.parent = base_name; 2518 generic_type_info.class_init = virtio_pci_base_class_init; 2519 generic_type_info.class_data = t; 2520 2521 assert(!t->non_transitional_name); 2522 assert(!t->transitional_name); 2523 } else { 2524 base_type_info.class_init = virtio_pci_base_class_init; 2525 base_type_info.class_data = t; 2526 } 2527 2528 type_register_static(&base_type_info); 2529 if (generic_type_info.name) { 2530 type_register_static(&generic_type_info); 2531 } 2532 2533 if (t->non_transitional_name) { 2534 const TypeInfo non_transitional_type_info = { 2535 .name = t->non_transitional_name, 2536 .parent = base_type_info.name, 2537 .instance_init = virtio_pci_non_transitional_instance_init, 2538 .interfaces = (const InterfaceInfo[]) { 2539 { INTERFACE_PCIE_DEVICE }, 2540 { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 2541 { } 2542 }, 2543 }; 2544 type_register_static(&non_transitional_type_info); 2545 } 2546 2547 if (t->transitional_name) { 2548 const TypeInfo transitional_type_info = { 2549 .name = t->transitional_name, 2550 .parent = base_type_info.name, 2551 .instance_init = virtio_pci_transitional_instance_init, 2552 .interfaces = (const InterfaceInfo[]) { 2553 /* 2554 * Transitional virtio devices work only as Conventional PCI 2555 * devices because they require PIO ports. 2556 */ 2557 { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 2558 { } 2559 }, 2560 }; 2561 type_register_static(&transitional_type_info); 2562 } 2563 g_free(base_name); 2564 } 2565 2566 unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues) 2567 { 2568 /* 2569 * 1:1 vq to vCPU mapping is ideal because the same vCPU that submitted 2570 * virtqueue buffers can handle their completion. When a different vCPU 2571 * handles completion it may need to IPI the vCPU that submitted the 2572 * request and this adds overhead. 2573 * 2574 * Virtqueues consume guest RAM and MSI-X vectors. This is wasteful in 2575 * guests with very many vCPUs and a device that is only used by a few 2576 * vCPUs. Unfortunately optimizing that case requires manual pinning inside 2577 * the guest, so those users might as well manually set the number of 2578 * queues. There is no upper limit that can be applied automatically and 2579 * doing so arbitrarily would result in a sudden performance drop once the 2580 * threshold number of vCPUs is exceeded. 2581 */ 2582 unsigned num_queues = current_machine->smp.cpus; 2583 2584 /* 2585 * The maximum number of MSI-X vectors is PCI_MSIX_FLAGS_QSIZE + 1, but the 2586 * config change interrupt and the fixed virtqueues must be taken into 2587 * account too. 2588 */ 2589 num_queues = MIN(num_queues, PCI_MSIX_FLAGS_QSIZE - fixed_queues); 2590 2591 /* 2592 * There is a limit to how many virtqueues a device can have. 2593 */ 2594 return MIN(num_queues, VIRTIO_QUEUE_MAX - fixed_queues); 2595 } 2596 2597 /* virtio-pci-bus */ 2598 2599 static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, 2600 VirtIOPCIProxy *dev) 2601 { 2602 DeviceState *qdev = DEVICE(dev); 2603 char virtio_bus_name[] = "virtio-bus"; 2604 2605 qbus_init(bus, bus_size, TYPE_VIRTIO_PCI_BUS, qdev, virtio_bus_name); 2606 } 2607 2608 static void virtio_pci_bus_class_init(ObjectClass *klass, const void *data) 2609 { 2610 BusClass *bus_class = BUS_CLASS(klass); 2611 VirtioBusClass *k = VIRTIO_BUS_CLASS(klass); 2612 bus_class->max_dev = 1; 2613 k->notify = virtio_pci_notify; 2614 k->save_config = virtio_pci_save_config; 2615 k->load_config = virtio_pci_load_config; 2616 k->save_queue = virtio_pci_save_queue; 2617 k->load_queue = virtio_pci_load_queue; 2618 k->save_extra_state = virtio_pci_save_extra_state; 2619 k->load_extra_state = virtio_pci_load_extra_state; 2620 k->has_extra_state = virtio_pci_has_extra_state; 2621 k->query_guest_notifiers = virtio_pci_query_guest_notifiers; 2622 k->set_guest_notifiers = virtio_pci_set_guest_notifiers; 2623 k->set_host_notifier_mr = virtio_pci_set_host_notifier_mr; 2624 k->vmstate_change = virtio_pci_vmstate_change; 2625 k->pre_plugged = virtio_pci_pre_plugged; 2626 k->device_plugged = virtio_pci_device_plugged; 2627 k->device_unplugged = virtio_pci_device_unplugged; 2628 k->query_nvectors = virtio_pci_query_nvectors; 2629 k->ioeventfd_enabled = virtio_pci_ioeventfd_enabled; 2630 k->ioeventfd_assign = virtio_pci_ioeventfd_assign; 2631 k->get_dma_as = virtio_pci_get_dma_as; 2632 k->iommu_enabled = virtio_pci_iommu_enabled; 2633 k->queue_enabled = virtio_pci_queue_enabled; 2634 } 2635 2636 static const TypeInfo virtio_pci_bus_info = { 2637 .name = TYPE_VIRTIO_PCI_BUS, 2638 .parent = TYPE_VIRTIO_BUS, 2639 .instance_size = sizeof(VirtioPCIBusState), 2640 .class_size = sizeof(VirtioPCIBusClass), 2641 .class_init = virtio_pci_bus_class_init, 2642 }; 2643 2644 static void virtio_pci_register_types(void) 2645 { 2646 /* Base types: */ 2647 type_register_static(&virtio_pci_bus_info); 2648 type_register_static(&virtio_pci_info); 2649 } 2650 2651 type_init(virtio_pci_register_types) 2652 2653