1 /* 2 * Virtio Support 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qapi/error.h" 16 #include "cpu.h" 17 #include "trace.h" 18 #include "qemu/error-report.h" 19 #include "qemu/log.h" 20 #include "qemu/main-loop.h" 21 #include "qemu/module.h" 22 #include "hw/virtio/virtio.h" 23 #include "migration/qemu-file-types.h" 24 #include "qemu/atomic.h" 25 #include "hw/virtio/virtio-bus.h" 26 #include "hw/qdev-properties.h" 27 #include "hw/virtio/virtio-access.h" 28 #include "sysemu/dma.h" 29 #include "sysemu/runstate.h" 30 #include "standard-headers/linux/virtio_ids.h" 31 32 /* 33 * The alignment to use between consumer and producer parts of vring. 34 * x86 pagesize again. This is the default, used by transports like PCI 35 * which don't provide a means for the guest to tell the host the alignment. 36 */ 37 #define VIRTIO_PCI_VRING_ALIGN 4096 38 39 typedef struct VRingDesc 40 { 41 uint64_t addr; 42 uint32_t len; 43 uint16_t flags; 44 uint16_t next; 45 } VRingDesc; 46 47 typedef struct VRingPackedDesc { 48 uint64_t addr; 49 uint32_t len; 50 uint16_t id; 51 uint16_t flags; 52 } VRingPackedDesc; 53 54 typedef struct VRingAvail 55 { 56 uint16_t flags; 57 uint16_t idx; 58 uint16_t ring[]; 59 } VRingAvail; 60 61 typedef struct VRingUsedElem 62 { 63 uint32_t id; 64 uint32_t len; 65 } VRingUsedElem; 66 67 typedef struct VRingUsed 68 { 69 uint16_t flags; 70 uint16_t idx; 71 VRingUsedElem ring[]; 72 } VRingUsed; 73 74 typedef struct VRingMemoryRegionCaches { 75 struct rcu_head rcu; 76 MemoryRegionCache desc; 77 MemoryRegionCache avail; 78 MemoryRegionCache used; 79 } VRingMemoryRegionCaches; 80 81 typedef struct VRing 82 { 83 unsigned int num; 84 unsigned int num_default; 85 unsigned int align; 86 hwaddr desc; 87 hwaddr avail; 88 hwaddr used; 89 VRingMemoryRegionCaches *caches; 90 } VRing; 91 92 typedef struct VRingPackedDescEvent { 93 uint16_t off_wrap; 94 uint16_t flags; 95 } VRingPackedDescEvent ; 96 97 struct VirtQueue 98 { 99 VRing vring; 100 VirtQueueElement *used_elems; 101 102 /* Next head to pop */ 103 uint16_t last_avail_idx; 104 bool last_avail_wrap_counter; 105 106 /* Last avail_idx read from VQ. */ 107 uint16_t shadow_avail_idx; 108 bool shadow_avail_wrap_counter; 109 110 uint16_t used_idx; 111 bool used_wrap_counter; 112 113 /* Last used index value we have signalled on */ 114 uint16_t signalled_used; 115 116 /* Last used index value we have signalled on */ 117 bool signalled_used_valid; 118 119 /* Notification enabled? */ 120 bool notification; 121 122 uint16_t queue_index; 123 124 unsigned int inuse; 125 126 uint16_t vector; 127 VirtIOHandleOutput handle_output; 128 VirtIODevice *vdev; 129 EventNotifier guest_notifier; 130 EventNotifier host_notifier; 131 bool host_notifier_enabled; 132 QLIST_ENTRY(VirtQueue) node; 133 }; 134 135 const char *virtio_device_names[] = { 136 [VIRTIO_ID_NET] = "virtio-net", 137 [VIRTIO_ID_BLOCK] = "virtio-blk", 138 [VIRTIO_ID_CONSOLE] = "virtio-serial", 139 [VIRTIO_ID_RNG] = "virtio-rng", 140 [VIRTIO_ID_BALLOON] = "virtio-balloon", 141 [VIRTIO_ID_IOMEM] = "virtio-iomem", 142 [VIRTIO_ID_RPMSG] = "virtio-rpmsg", 143 [VIRTIO_ID_SCSI] = "virtio-scsi", 144 [VIRTIO_ID_9P] = "virtio-9p", 145 [VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan", 146 [VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial", 147 [VIRTIO_ID_CAIF] = "virtio-caif", 148 [VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon", 149 [VIRTIO_ID_GPU] = "virtio-gpu", 150 [VIRTIO_ID_CLOCK] = "virtio-clk", 151 [VIRTIO_ID_INPUT] = "virtio-input", 152 [VIRTIO_ID_VSOCK] = "vhost-vsock", 153 [VIRTIO_ID_CRYPTO] = "virtio-crypto", 154 [VIRTIO_ID_SIGNAL_DIST] = "virtio-signal", 155 [VIRTIO_ID_PSTORE] = "virtio-pstore", 156 [VIRTIO_ID_IOMMU] = "virtio-iommu", 157 [VIRTIO_ID_MEM] = "virtio-mem", 158 [VIRTIO_ID_SOUND] = "virtio-sound", 159 [VIRTIO_ID_FS] = "virtio-user-fs", 160 [VIRTIO_ID_PMEM] = "virtio-pmem", 161 [VIRTIO_ID_RPMB] = "virtio-rpmb", 162 [VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim", 163 [VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder", 164 [VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder", 165 [VIRTIO_ID_SCMI] = "virtio-scmi", 166 [VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod", 167 [VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c", 168 [VIRTIO_ID_WATCHDOG] = "virtio-watchdog", 169 [VIRTIO_ID_CAN] = "virtio-can", 170 [VIRTIO_ID_DMABUF] = "virtio-dmabuf", 171 [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv", 172 [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol", 173 [VIRTIO_ID_BT] = "virtio-bluetooth", 174 [VIRTIO_ID_GPIO] = "virtio-gpio" 175 }; 176 177 static const char *virtio_id_to_name(uint16_t device_id) 178 { 179 assert(device_id < G_N_ELEMENTS(virtio_device_names)); 180 const char *name = virtio_device_names[device_id]; 181 assert(name != NULL); 182 return name; 183 } 184 185 /* Called within call_rcu(). */ 186 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches) 187 { 188 assert(caches != NULL); 189 address_space_cache_destroy(&caches->desc); 190 address_space_cache_destroy(&caches->avail); 191 address_space_cache_destroy(&caches->used); 192 g_free(caches); 193 } 194 195 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq) 196 { 197 VRingMemoryRegionCaches *caches; 198 199 caches = qatomic_read(&vq->vring.caches); 200 qatomic_rcu_set(&vq->vring.caches, NULL); 201 if (caches) { 202 call_rcu(caches, virtio_free_region_cache, rcu); 203 } 204 } 205 206 static void virtio_init_region_cache(VirtIODevice *vdev, int n) 207 { 208 VirtQueue *vq = &vdev->vq[n]; 209 VRingMemoryRegionCaches *old = vq->vring.caches; 210 VRingMemoryRegionCaches *new = NULL; 211 hwaddr addr, size; 212 int64_t len; 213 bool packed; 214 215 216 addr = vq->vring.desc; 217 if (!addr) { 218 goto out_no_cache; 219 } 220 new = g_new0(VRingMemoryRegionCaches, 1); 221 size = virtio_queue_get_desc_size(vdev, n); 222 packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ? 223 true : false; 224 len = address_space_cache_init(&new->desc, vdev->dma_as, 225 addr, size, packed); 226 if (len < size) { 227 virtio_error(vdev, "Cannot map desc"); 228 goto err_desc; 229 } 230 231 size = virtio_queue_get_used_size(vdev, n); 232 len = address_space_cache_init(&new->used, vdev->dma_as, 233 vq->vring.used, size, true); 234 if (len < size) { 235 virtio_error(vdev, "Cannot map used"); 236 goto err_used; 237 } 238 239 size = virtio_queue_get_avail_size(vdev, n); 240 len = address_space_cache_init(&new->avail, vdev->dma_as, 241 vq->vring.avail, size, false); 242 if (len < size) { 243 virtio_error(vdev, "Cannot map avail"); 244 goto err_avail; 245 } 246 247 qatomic_rcu_set(&vq->vring.caches, new); 248 if (old) { 249 call_rcu(old, virtio_free_region_cache, rcu); 250 } 251 return; 252 253 err_avail: 254 address_space_cache_destroy(&new->avail); 255 err_used: 256 address_space_cache_destroy(&new->used); 257 err_desc: 258 address_space_cache_destroy(&new->desc); 259 out_no_cache: 260 g_free(new); 261 virtio_virtqueue_reset_region_cache(vq); 262 } 263 264 /* virt queue functions */ 265 void virtio_queue_update_rings(VirtIODevice *vdev, int n) 266 { 267 VRing *vring = &vdev->vq[n].vring; 268 269 if (!vring->num || !vring->desc || !vring->align) { 270 /* not yet setup -> nothing to do */ 271 return; 272 } 273 vring->avail = vring->desc + vring->num * sizeof(VRingDesc); 274 vring->used = vring_align(vring->avail + 275 offsetof(VRingAvail, ring[vring->num]), 276 vring->align); 277 virtio_init_region_cache(vdev, n); 278 } 279 280 /* Called within rcu_read_lock(). */ 281 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc, 282 MemoryRegionCache *cache, int i) 283 { 284 address_space_read_cached(cache, i * sizeof(VRingDesc), 285 desc, sizeof(VRingDesc)); 286 virtio_tswap64s(vdev, &desc->addr); 287 virtio_tswap32s(vdev, &desc->len); 288 virtio_tswap16s(vdev, &desc->flags); 289 virtio_tswap16s(vdev, &desc->next); 290 } 291 292 static void vring_packed_event_read(VirtIODevice *vdev, 293 MemoryRegionCache *cache, 294 VRingPackedDescEvent *e) 295 { 296 hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap); 297 hwaddr off_flags = offsetof(VRingPackedDescEvent, flags); 298 299 e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags); 300 /* Make sure flags is seen before off_wrap */ 301 smp_rmb(); 302 e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off); 303 virtio_tswap16s(vdev, &e->flags); 304 } 305 306 static void vring_packed_off_wrap_write(VirtIODevice *vdev, 307 MemoryRegionCache *cache, 308 uint16_t off_wrap) 309 { 310 hwaddr off = offsetof(VRingPackedDescEvent, off_wrap); 311 312 virtio_stw_phys_cached(vdev, cache, off, off_wrap); 313 address_space_cache_invalidate(cache, off, sizeof(off_wrap)); 314 } 315 316 static void vring_packed_flags_write(VirtIODevice *vdev, 317 MemoryRegionCache *cache, uint16_t flags) 318 { 319 hwaddr off = offsetof(VRingPackedDescEvent, flags); 320 321 virtio_stw_phys_cached(vdev, cache, off, flags); 322 address_space_cache_invalidate(cache, off, sizeof(flags)); 323 } 324 325 /* Called within rcu_read_lock(). */ 326 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq) 327 { 328 return qatomic_rcu_read(&vq->vring.caches); 329 } 330 331 /* Called within rcu_read_lock(). */ 332 static inline uint16_t vring_avail_flags(VirtQueue *vq) 333 { 334 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); 335 hwaddr pa = offsetof(VRingAvail, flags); 336 337 if (!caches) { 338 return 0; 339 } 340 341 return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa); 342 } 343 344 /* Called within rcu_read_lock(). */ 345 static inline uint16_t vring_avail_idx(VirtQueue *vq) 346 { 347 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); 348 hwaddr pa = offsetof(VRingAvail, idx); 349 350 if (!caches) { 351 return 0; 352 } 353 354 vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa); 355 return vq->shadow_avail_idx; 356 } 357 358 /* Called within rcu_read_lock(). */ 359 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) 360 { 361 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); 362 hwaddr pa = offsetof(VRingAvail, ring[i]); 363 364 if (!caches) { 365 return 0; 366 } 367 368 return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa); 369 } 370 371 /* Called within rcu_read_lock(). */ 372 static inline uint16_t vring_get_used_event(VirtQueue *vq) 373 { 374 return vring_avail_ring(vq, vq->vring.num); 375 } 376 377 /* Called within rcu_read_lock(). */ 378 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem, 379 int i) 380 { 381 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); 382 hwaddr pa = offsetof(VRingUsed, ring[i]); 383 384 if (!caches) { 385 return; 386 } 387 388 virtio_tswap32s(vq->vdev, &uelem->id); 389 virtio_tswap32s(vq->vdev, &uelem->len); 390 address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem)); 391 address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem)); 392 } 393 394 /* Called within rcu_read_lock(). */ 395 static uint16_t vring_used_idx(VirtQueue *vq) 396 { 397 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); 398 hwaddr pa = offsetof(VRingUsed, idx); 399 400 if (!caches) { 401 return 0; 402 } 403 404 return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); 405 } 406 407 /* Called within rcu_read_lock(). */ 408 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) 409 { 410 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); 411 hwaddr pa = offsetof(VRingUsed, idx); 412 413 if (caches) { 414 virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val); 415 address_space_cache_invalidate(&caches->used, pa, sizeof(val)); 416 } 417 418 vq->used_idx = val; 419 } 420 421 /* Called within rcu_read_lock(). */ 422 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) 423 { 424 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); 425 VirtIODevice *vdev = vq->vdev; 426 hwaddr pa = offsetof(VRingUsed, flags); 427 uint16_t flags; 428 429 if (!caches) { 430 return; 431 } 432 433 flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); 434 virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask); 435 address_space_cache_invalidate(&caches->used, pa, sizeof(flags)); 436 } 437 438 /* Called within rcu_read_lock(). */ 439 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) 440 { 441 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); 442 VirtIODevice *vdev = vq->vdev; 443 hwaddr pa = offsetof(VRingUsed, flags); 444 uint16_t flags; 445 446 if (!caches) { 447 return; 448 } 449 450 flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); 451 virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask); 452 address_space_cache_invalidate(&caches->used, pa, sizeof(flags)); 453 } 454 455 /* Called within rcu_read_lock(). */ 456 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) 457 { 458 VRingMemoryRegionCaches *caches; 459 hwaddr pa; 460 if (!vq->notification) { 461 return; 462 } 463 464 caches = vring_get_region_caches(vq); 465 if (!caches) { 466 return; 467 } 468 469 pa = offsetof(VRingUsed, ring[vq->vring.num]); 470 virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val); 471 address_space_cache_invalidate(&caches->used, pa, sizeof(val)); 472 } 473 474 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable) 475 { 476 RCU_READ_LOCK_GUARD(); 477 478 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { 479 vring_set_avail_event(vq, vring_avail_idx(vq)); 480 } else if (enable) { 481 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); 482 } else { 483 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); 484 } 485 if (enable) { 486 /* Expose avail event/used flags before caller checks the avail idx. */ 487 smp_mb(); 488 } 489 } 490 491 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable) 492 { 493 uint16_t off_wrap; 494 VRingPackedDescEvent e; 495 VRingMemoryRegionCaches *caches; 496 497 RCU_READ_LOCK_GUARD(); 498 caches = vring_get_region_caches(vq); 499 if (!caches) { 500 return; 501 } 502 503 vring_packed_event_read(vq->vdev, &caches->used, &e); 504 505 if (!enable) { 506 e.flags = VRING_PACKED_EVENT_FLAG_DISABLE; 507 } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { 508 off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15; 509 vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap); 510 /* Make sure off_wrap is wrote before flags */ 511 smp_wmb(); 512 e.flags = VRING_PACKED_EVENT_FLAG_DESC; 513 } else { 514 e.flags = VRING_PACKED_EVENT_FLAG_ENABLE; 515 } 516 517 vring_packed_flags_write(vq->vdev, &caches->used, e.flags); 518 if (enable) { 519 /* Expose avail event/used flags before caller checks the avail idx. */ 520 smp_mb(); 521 } 522 } 523 524 bool virtio_queue_get_notification(VirtQueue *vq) 525 { 526 return vq->notification; 527 } 528 529 void virtio_queue_set_notification(VirtQueue *vq, int enable) 530 { 531 vq->notification = enable; 532 533 if (!vq->vring.desc) { 534 return; 535 } 536 537 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { 538 virtio_queue_packed_set_notification(vq, enable); 539 } else { 540 virtio_queue_split_set_notification(vq, enable); 541 } 542 } 543 544 int virtio_queue_ready(VirtQueue *vq) 545 { 546 return vq->vring.avail != 0; 547 } 548 549 static void vring_packed_desc_read_flags(VirtIODevice *vdev, 550 uint16_t *flags, 551 MemoryRegionCache *cache, 552 int i) 553 { 554 hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags); 555 556 *flags = virtio_lduw_phys_cached(vdev, cache, off); 557 } 558 559 static void vring_packed_desc_read(VirtIODevice *vdev, 560 VRingPackedDesc *desc, 561 MemoryRegionCache *cache, 562 int i, bool strict_order) 563 { 564 hwaddr off = i * sizeof(VRingPackedDesc); 565 566 vring_packed_desc_read_flags(vdev, &desc->flags, cache, i); 567 568 if (strict_order) { 569 /* Make sure flags is read before the rest fields. */ 570 smp_rmb(); 571 } 572 573 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr), 574 &desc->addr, sizeof(desc->addr)); 575 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id), 576 &desc->id, sizeof(desc->id)); 577 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len), 578 &desc->len, sizeof(desc->len)); 579 virtio_tswap64s(vdev, &desc->addr); 580 virtio_tswap16s(vdev, &desc->id); 581 virtio_tswap32s(vdev, &desc->len); 582 } 583 584 static void vring_packed_desc_write_data(VirtIODevice *vdev, 585 VRingPackedDesc *desc, 586 MemoryRegionCache *cache, 587 int i) 588 { 589 hwaddr off_id = i * sizeof(VRingPackedDesc) + 590 offsetof(VRingPackedDesc, id); 591 hwaddr off_len = i * sizeof(VRingPackedDesc) + 592 offsetof(VRingPackedDesc, len); 593 594 virtio_tswap32s(vdev, &desc->len); 595 virtio_tswap16s(vdev, &desc->id); 596 address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id)); 597 address_space_cache_invalidate(cache, off_id, sizeof(desc->id)); 598 address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len)); 599 address_space_cache_invalidate(cache, off_len, sizeof(desc->len)); 600 } 601 602 static void vring_packed_desc_write_flags(VirtIODevice *vdev, 603 VRingPackedDesc *desc, 604 MemoryRegionCache *cache, 605 int i) 606 { 607 hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags); 608 609 virtio_stw_phys_cached(vdev, cache, off, desc->flags); 610 address_space_cache_invalidate(cache, off, sizeof(desc->flags)); 611 } 612 613 static void vring_packed_desc_write(VirtIODevice *vdev, 614 VRingPackedDesc *desc, 615 MemoryRegionCache *cache, 616 int i, bool strict_order) 617 { 618 vring_packed_desc_write_data(vdev, desc, cache, i); 619 if (strict_order) { 620 /* Make sure data is wrote before flags. */ 621 smp_wmb(); 622 } 623 vring_packed_desc_write_flags(vdev, desc, cache, i); 624 } 625 626 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter) 627 { 628 bool avail, used; 629 630 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 631 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 632 return (avail != used) && (avail == wrap_counter); 633 } 634 635 /* Fetch avail_idx from VQ memory only when we really need to know if 636 * guest has added some buffers. 637 * Called within rcu_read_lock(). */ 638 static int virtio_queue_empty_rcu(VirtQueue *vq) 639 { 640 if (virtio_device_disabled(vq->vdev)) { 641 return 1; 642 } 643 644 if (unlikely(!vq->vring.avail)) { 645 return 1; 646 } 647 648 if (vq->shadow_avail_idx != vq->last_avail_idx) { 649 return 0; 650 } 651 652 return vring_avail_idx(vq) == vq->last_avail_idx; 653 } 654 655 static int virtio_queue_split_empty(VirtQueue *vq) 656 { 657 bool empty; 658 659 if (virtio_device_disabled(vq->vdev)) { 660 return 1; 661 } 662 663 if (unlikely(!vq->vring.avail)) { 664 return 1; 665 } 666 667 if (vq->shadow_avail_idx != vq->last_avail_idx) { 668 return 0; 669 } 670 671 RCU_READ_LOCK_GUARD(); 672 empty = vring_avail_idx(vq) == vq->last_avail_idx; 673 return empty; 674 } 675 676 /* Called within rcu_read_lock(). */ 677 static int virtio_queue_packed_empty_rcu(VirtQueue *vq) 678 { 679 struct VRingPackedDesc desc; 680 VRingMemoryRegionCaches *cache; 681 682 if (unlikely(!vq->vring.desc)) { 683 return 1; 684 } 685 686 cache = vring_get_region_caches(vq); 687 if (!cache) { 688 return 1; 689 } 690 691 vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc, 692 vq->last_avail_idx); 693 694 return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter); 695 } 696 697 static int virtio_queue_packed_empty(VirtQueue *vq) 698 { 699 RCU_READ_LOCK_GUARD(); 700 return virtio_queue_packed_empty_rcu(vq); 701 } 702 703 int virtio_queue_empty(VirtQueue *vq) 704 { 705 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { 706 return virtio_queue_packed_empty(vq); 707 } else { 708 return virtio_queue_split_empty(vq); 709 } 710 } 711 712 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, 713 unsigned int len) 714 { 715 AddressSpace *dma_as = vq->vdev->dma_as; 716 unsigned int offset; 717 int i; 718 719 offset = 0; 720 for (i = 0; i < elem->in_num; i++) { 721 size_t size = MIN(len - offset, elem->in_sg[i].iov_len); 722 723 dma_memory_unmap(dma_as, elem->in_sg[i].iov_base, 724 elem->in_sg[i].iov_len, 725 DMA_DIRECTION_FROM_DEVICE, size); 726 727 offset += size; 728 } 729 730 for (i = 0; i < elem->out_num; i++) 731 dma_memory_unmap(dma_as, elem->out_sg[i].iov_base, 732 elem->out_sg[i].iov_len, 733 DMA_DIRECTION_TO_DEVICE, 734 elem->out_sg[i].iov_len); 735 } 736 737 /* virtqueue_detach_element: 738 * @vq: The #VirtQueue 739 * @elem: The #VirtQueueElement 740 * @len: number of bytes written 741 * 742 * Detach the element from the virtqueue. This function is suitable for device 743 * reset or other situations where a #VirtQueueElement is simply freed and will 744 * not be pushed or discarded. 745 */ 746 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem, 747 unsigned int len) 748 { 749 vq->inuse -= elem->ndescs; 750 virtqueue_unmap_sg(vq, elem, len); 751 } 752 753 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num) 754 { 755 vq->last_avail_idx -= num; 756 } 757 758 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num) 759 { 760 if (vq->last_avail_idx < num) { 761 vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num; 762 vq->last_avail_wrap_counter ^= 1; 763 } else { 764 vq->last_avail_idx -= num; 765 } 766 } 767 768 /* virtqueue_unpop: 769 * @vq: The #VirtQueue 770 * @elem: The #VirtQueueElement 771 * @len: number of bytes written 772 * 773 * Pretend the most recent element wasn't popped from the virtqueue. The next 774 * call to virtqueue_pop() will refetch the element. 775 */ 776 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem, 777 unsigned int len) 778 { 779 780 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { 781 virtqueue_packed_rewind(vq, 1); 782 } else { 783 virtqueue_split_rewind(vq, 1); 784 } 785 786 virtqueue_detach_element(vq, elem, len); 787 } 788 789 /* virtqueue_rewind: 790 * @vq: The #VirtQueue 791 * @num: Number of elements to push back 792 * 793 * Pretend that elements weren't popped from the virtqueue. The next 794 * virtqueue_pop() will refetch the oldest element. 795 * 796 * Use virtqueue_unpop() instead if you have a VirtQueueElement. 797 * 798 * Returns: true on success, false if @num is greater than the number of in use 799 * elements. 800 */ 801 bool virtqueue_rewind(VirtQueue *vq, unsigned int num) 802 { 803 if (num > vq->inuse) { 804 return false; 805 } 806 807 vq->inuse -= num; 808 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { 809 virtqueue_packed_rewind(vq, num); 810 } else { 811 virtqueue_split_rewind(vq, num); 812 } 813 return true; 814 } 815 816 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem, 817 unsigned int len, unsigned int idx) 818 { 819 VRingUsedElem uelem; 820 821 if (unlikely(!vq->vring.used)) { 822 return; 823 } 824 825 idx = (idx + vq->used_idx) % vq->vring.num; 826 827 uelem.id = elem->index; 828 uelem.len = len; 829 vring_used_write(vq, &uelem, idx); 830 } 831 832 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem, 833 unsigned int len, unsigned int idx) 834 { 835 vq->used_elems[idx].index = elem->index; 836 vq->used_elems[idx].len = len; 837 vq->used_elems[idx].ndescs = elem->ndescs; 838 } 839 840 static void virtqueue_packed_fill_desc(VirtQueue *vq, 841 const VirtQueueElement *elem, 842 unsigned int idx, 843 bool strict_order) 844 { 845 uint16_t head; 846 VRingMemoryRegionCaches *caches; 847 VRingPackedDesc desc = { 848 .id = elem->index, 849 .len = elem->len, 850 }; 851 bool wrap_counter = vq->used_wrap_counter; 852 853 if (unlikely(!vq->vring.desc)) { 854 return; 855 } 856 857 head = vq->used_idx + idx; 858 if (head >= vq->vring.num) { 859 head -= vq->vring.num; 860 wrap_counter ^= 1; 861 } 862 if (wrap_counter) { 863 desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL); 864 desc.flags |= (1 << VRING_PACKED_DESC_F_USED); 865 } else { 866 desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL); 867 desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED); 868 } 869 870 caches = vring_get_region_caches(vq); 871 if (!caches) { 872 return; 873 } 874 875 vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order); 876 } 877 878 /* Called within rcu_read_lock(). */ 879 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, 880 unsigned int len, unsigned int idx) 881 { 882 trace_virtqueue_fill(vq, elem, len, idx); 883 884 virtqueue_unmap_sg(vq, elem, len); 885 886 if (virtio_device_disabled(vq->vdev)) { 887 return; 888 } 889 890 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { 891 virtqueue_packed_fill(vq, elem, len, idx); 892 } else { 893 virtqueue_split_fill(vq, elem, len, idx); 894 } 895 } 896 897 /* Called within rcu_read_lock(). */ 898 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count) 899 { 900 uint16_t old, new; 901 902 if (unlikely(!vq->vring.used)) { 903 return; 904 } 905 906 /* Make sure buffer is written before we update index. */ 907 smp_wmb(); 908 trace_virtqueue_flush(vq, count); 909 old = vq->used_idx; 910 new = old + count; 911 vring_used_idx_set(vq, new); 912 vq->inuse -= count; 913 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) 914 vq->signalled_used_valid = false; 915 } 916 917 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count) 918 { 919 unsigned int i, ndescs = 0; 920 921 if (unlikely(!vq->vring.desc)) { 922 return; 923 } 924 925 for (i = 1; i < count; i++) { 926 virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false); 927 ndescs += vq->used_elems[i].ndescs; 928 } 929 virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true); 930 ndescs += vq->used_elems[0].ndescs; 931 932 vq->inuse -= ndescs; 933 vq->used_idx += ndescs; 934 if (vq->used_idx >= vq->vring.num) { 935 vq->used_idx -= vq->vring.num; 936 vq->used_wrap_counter ^= 1; 937 vq->signalled_used_valid = false; 938 } 939 } 940 941 void virtqueue_flush(VirtQueue *vq, unsigned int count) 942 { 943 if (virtio_device_disabled(vq->vdev)) { 944 vq->inuse -= count; 945 return; 946 } 947 948 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { 949 virtqueue_packed_flush(vq, count); 950 } else { 951 virtqueue_split_flush(vq, count); 952 } 953 } 954 955 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, 956 unsigned int len) 957 { 958 RCU_READ_LOCK_GUARD(); 959 virtqueue_fill(vq, elem, len, 0); 960 virtqueue_flush(vq, 1); 961 } 962 963 /* Called within rcu_read_lock(). */ 964 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) 965 { 966 uint16_t num_heads = vring_avail_idx(vq) - idx; 967 968 /* Check it isn't doing very strange things with descriptor numbers. */ 969 if (num_heads > vq->vring.num) { 970 virtio_error(vq->vdev, "Guest moved used index from %u to %u", 971 idx, vq->shadow_avail_idx); 972 return -EINVAL; 973 } 974 /* On success, callers read a descriptor at vq->last_avail_idx. 975 * Make sure descriptor read does not bypass avail index read. */ 976 if (num_heads) { 977 smp_rmb(); 978 } 979 980 return num_heads; 981 } 982 983 /* Called within rcu_read_lock(). */ 984 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx, 985 unsigned int *head) 986 { 987 /* Grab the next descriptor number they're advertising, and increment 988 * the index we've seen. */ 989 *head = vring_avail_ring(vq, idx % vq->vring.num); 990 991 /* If their number is silly, that's a fatal mistake. */ 992 if (*head >= vq->vring.num) { 993 virtio_error(vq->vdev, "Guest says index %u is available", *head); 994 return false; 995 } 996 997 return true; 998 } 999 1000 enum { 1001 VIRTQUEUE_READ_DESC_ERROR = -1, 1002 VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */ 1003 VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */ 1004 }; 1005 1006 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, 1007 MemoryRegionCache *desc_cache, 1008 unsigned int max, unsigned int *next) 1009 { 1010 /* If this descriptor says it doesn't chain, we're done. */ 1011 if (!(desc->flags & VRING_DESC_F_NEXT)) { 1012 return VIRTQUEUE_READ_DESC_DONE; 1013 } 1014 1015 /* Check they're not leading us off end of descriptors. */ 1016 *next = desc->next; 1017 /* Make sure compiler knows to grab that: we don't want it changing! */ 1018 smp_wmb(); 1019 1020 if (*next >= max) { 1021 virtio_error(vdev, "Desc next is %u", *next); 1022 return VIRTQUEUE_READ_DESC_ERROR; 1023 } 1024 1025 vring_split_desc_read(vdev, desc, desc_cache, *next); 1026 return VIRTQUEUE_READ_DESC_MORE; 1027 } 1028 1029 /* Called within rcu_read_lock(). */ 1030 static void virtqueue_split_get_avail_bytes(VirtQueue *vq, 1031 unsigned int *in_bytes, unsigned int *out_bytes, 1032 unsigned max_in_bytes, unsigned max_out_bytes, 1033 VRingMemoryRegionCaches *caches) 1034 { 1035 VirtIODevice *vdev = vq->vdev; 1036 unsigned int max, idx; 1037 unsigned int total_bufs, in_total, out_total; 1038 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; 1039 int64_t len = 0; 1040 int rc; 1041 1042 idx = vq->last_avail_idx; 1043 total_bufs = in_total = out_total = 0; 1044 1045 max = vq->vring.num; 1046 1047 while ((rc = virtqueue_num_heads(vq, idx)) > 0) { 1048 MemoryRegionCache *desc_cache = &caches->desc; 1049 unsigned int num_bufs; 1050 VRingDesc desc; 1051 unsigned int i; 1052 1053 num_bufs = total_bufs; 1054 1055 if (!virtqueue_get_head(vq, idx++, &i)) { 1056 goto err; 1057 } 1058 1059 vring_split_desc_read(vdev, &desc, desc_cache, i); 1060 1061 if (desc.flags & VRING_DESC_F_INDIRECT) { 1062 if (!desc.len || (desc.len % sizeof(VRingDesc))) { 1063 virtio_error(vdev, "Invalid size for indirect buffer table"); 1064 goto err; 1065 } 1066 1067 /* If we've got too many, that implies a descriptor loop. */ 1068 if (num_bufs >= max) { 1069 virtio_error(vdev, "Looped descriptor"); 1070 goto err; 1071 } 1072 1073 /* loop over the indirect descriptor table */ 1074 len = address_space_cache_init(&indirect_desc_cache, 1075 vdev->dma_as, 1076 desc.addr, desc.len, false); 1077 desc_cache = &indirect_desc_cache; 1078 if (len < desc.len) { 1079 virtio_error(vdev, "Cannot map indirect buffer"); 1080 goto err; 1081 } 1082 1083 max = desc.len / sizeof(VRingDesc); 1084 num_bufs = i = 0; 1085 vring_split_desc_read(vdev, &desc, desc_cache, i); 1086 } 1087 1088 do { 1089 /* If we've got too many, that implies a descriptor loop. */ 1090 if (++num_bufs > max) { 1091 virtio_error(vdev, "Looped descriptor"); 1092 goto err; 1093 } 1094 1095 if (desc.flags & VRING_DESC_F_WRITE) { 1096 in_total += desc.len; 1097 } else { 1098 out_total += desc.len; 1099 } 1100 if (in_total >= max_in_bytes && out_total >= max_out_bytes) { 1101 goto done; 1102 } 1103 1104 rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i); 1105 } while (rc == VIRTQUEUE_READ_DESC_MORE); 1106 1107 if (rc == VIRTQUEUE_READ_DESC_ERROR) { 1108 goto err; 1109 } 1110 1111 if (desc_cache == &indirect_desc_cache) { 1112 address_space_cache_destroy(&indirect_desc_cache); 1113 total_bufs++; 1114 } else { 1115 total_bufs = num_bufs; 1116 } 1117 } 1118 1119 if (rc < 0) { 1120 goto err; 1121 } 1122 1123 done: 1124 address_space_cache_destroy(&indirect_desc_cache); 1125 if (in_bytes) { 1126 *in_bytes = in_total; 1127 } 1128 if (out_bytes) { 1129 *out_bytes = out_total; 1130 } 1131 return; 1132 1133 err: 1134 in_total = out_total = 0; 1135 goto done; 1136 } 1137 1138 static int virtqueue_packed_read_next_desc(VirtQueue *vq, 1139 VRingPackedDesc *desc, 1140 MemoryRegionCache 1141 *desc_cache, 1142 unsigned int max, 1143 unsigned int *next, 1144 bool indirect) 1145 { 1146 /* If this descriptor says it doesn't chain, we're done. */ 1147 if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) { 1148 return VIRTQUEUE_READ_DESC_DONE; 1149 } 1150 1151 ++*next; 1152 if (*next == max) { 1153 if (indirect) { 1154 return VIRTQUEUE_READ_DESC_DONE; 1155 } else { 1156 (*next) -= vq->vring.num; 1157 } 1158 } 1159 1160 vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false); 1161 return VIRTQUEUE_READ_DESC_MORE; 1162 } 1163 1164 /* Called within rcu_read_lock(). */ 1165 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq, 1166 unsigned int *in_bytes, 1167 unsigned int *out_bytes, 1168 unsigned max_in_bytes, 1169 unsigned max_out_bytes, 1170 VRingMemoryRegionCaches *caches) 1171 { 1172 VirtIODevice *vdev = vq->vdev; 1173 unsigned int max, idx; 1174 unsigned int total_bufs, in_total, out_total; 1175 MemoryRegionCache *desc_cache; 1176 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; 1177 int64_t len = 0; 1178 VRingPackedDesc desc; 1179 bool wrap_counter; 1180 1181 idx = vq->last_avail_idx; 1182 wrap_counter = vq->last_avail_wrap_counter; 1183 total_bufs = in_total = out_total = 0; 1184 1185 max = vq->vring.num; 1186 1187 for (;;) { 1188 unsigned int num_bufs = total_bufs; 1189 unsigned int i = idx; 1190 int rc; 1191 1192 desc_cache = &caches->desc; 1193 vring_packed_desc_read(vdev, &desc, desc_cache, idx, true); 1194 if (!is_desc_avail(desc.flags, wrap_counter)) { 1195 break; 1196 } 1197 1198 if (desc.flags & VRING_DESC_F_INDIRECT) { 1199 if (desc.len % sizeof(VRingPackedDesc)) { 1200 virtio_error(vdev, "Invalid size for indirect buffer table"); 1201 goto err; 1202 } 1203 1204 /* If we've got too many, that implies a descriptor loop. */ 1205 if (num_bufs >= max) { 1206 virtio_error(vdev, "Looped descriptor"); 1207 goto err; 1208 } 1209 1210 /* loop over the indirect descriptor table */ 1211 len = address_space_cache_init(&indirect_desc_cache, 1212 vdev->dma_as, 1213 desc.addr, desc.len, false); 1214 desc_cache = &indirect_desc_cache; 1215 if (len < desc.len) { 1216 virtio_error(vdev, "Cannot map indirect buffer"); 1217 goto err; 1218 } 1219 1220 max = desc.len / sizeof(VRingPackedDesc); 1221 num_bufs = i = 0; 1222 vring_packed_desc_read(vdev, &desc, desc_cache, i, false); 1223 } 1224 1225 do { 1226 /* If we've got too many, that implies a descriptor loop. */ 1227 if (++num_bufs > max) { 1228 virtio_error(vdev, "Looped descriptor"); 1229 goto err; 1230 } 1231 1232 if (desc.flags & VRING_DESC_F_WRITE) { 1233 in_total += desc.len; 1234 } else { 1235 out_total += desc.len; 1236 } 1237 if (in_total >= max_in_bytes && out_total >= max_out_bytes) { 1238 goto done; 1239 } 1240 1241 rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, 1242 &i, desc_cache == 1243 &indirect_desc_cache); 1244 } while (rc == VIRTQUEUE_READ_DESC_MORE); 1245 1246 if (desc_cache == &indirect_desc_cache) { 1247 address_space_cache_destroy(&indirect_desc_cache); 1248 total_bufs++; 1249 idx++; 1250 } else { 1251 idx += num_bufs - total_bufs; 1252 total_bufs = num_bufs; 1253 } 1254 1255 if (idx >= vq->vring.num) { 1256 idx -= vq->vring.num; 1257 wrap_counter ^= 1; 1258 } 1259 } 1260 1261 /* Record the index and wrap counter for a kick we want */ 1262 vq->shadow_avail_idx = idx; 1263 vq->shadow_avail_wrap_counter = wrap_counter; 1264 done: 1265 address_space_cache_destroy(&indirect_desc_cache); 1266 if (in_bytes) { 1267 *in_bytes = in_total; 1268 } 1269 if (out_bytes) { 1270 *out_bytes = out_total; 1271 } 1272 return; 1273 1274 err: 1275 in_total = out_total = 0; 1276 goto done; 1277 } 1278 1279 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, 1280 unsigned int *out_bytes, 1281 unsigned max_in_bytes, unsigned max_out_bytes) 1282 { 1283 uint16_t desc_size; 1284 VRingMemoryRegionCaches *caches; 1285 1286 RCU_READ_LOCK_GUARD(); 1287 1288 if (unlikely(!vq->vring.desc)) { 1289 goto err; 1290 } 1291 1292 caches = vring_get_region_caches(vq); 1293 if (!caches) { 1294 goto err; 1295 } 1296 1297 desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ? 1298 sizeof(VRingPackedDesc) : sizeof(VRingDesc); 1299 if (caches->desc.len < vq->vring.num * desc_size) { 1300 virtio_error(vq->vdev, "Cannot map descriptor ring"); 1301 goto err; 1302 } 1303 1304 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { 1305 virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes, 1306 max_in_bytes, max_out_bytes, 1307 caches); 1308 } else { 1309 virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes, 1310 max_in_bytes, max_out_bytes, 1311 caches); 1312 } 1313 1314 return; 1315 err: 1316 if (in_bytes) { 1317 *in_bytes = 0; 1318 } 1319 if (out_bytes) { 1320 *out_bytes = 0; 1321 } 1322 } 1323 1324 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, 1325 unsigned int out_bytes) 1326 { 1327 unsigned int in_total, out_total; 1328 1329 virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes); 1330 return in_bytes <= in_total && out_bytes <= out_total; 1331 } 1332 1333 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg, 1334 hwaddr *addr, struct iovec *iov, 1335 unsigned int max_num_sg, bool is_write, 1336 hwaddr pa, size_t sz) 1337 { 1338 bool ok = false; 1339 unsigned num_sg = *p_num_sg; 1340 assert(num_sg <= max_num_sg); 1341 1342 if (!sz) { 1343 virtio_error(vdev, "virtio: zero sized buffers are not allowed"); 1344 goto out; 1345 } 1346 1347 while (sz) { 1348 hwaddr len = sz; 1349 1350 if (num_sg == max_num_sg) { 1351 virtio_error(vdev, "virtio: too many write descriptors in " 1352 "indirect table"); 1353 goto out; 1354 } 1355 1356 iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len, 1357 is_write ? 1358 DMA_DIRECTION_FROM_DEVICE : 1359 DMA_DIRECTION_TO_DEVICE, 1360 MEMTXATTRS_UNSPECIFIED); 1361 if (!iov[num_sg].iov_base) { 1362 virtio_error(vdev, "virtio: bogus descriptor or out of resources"); 1363 goto out; 1364 } 1365 1366 iov[num_sg].iov_len = len; 1367 addr[num_sg] = pa; 1368 1369 sz -= len; 1370 pa += len; 1371 num_sg++; 1372 } 1373 ok = true; 1374 1375 out: 1376 *p_num_sg = num_sg; 1377 return ok; 1378 } 1379 1380 /* Only used by error code paths before we have a VirtQueueElement (therefore 1381 * virtqueue_unmap_sg() can't be used). Assumes buffers weren't written to 1382 * yet. 1383 */ 1384 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num, 1385 struct iovec *iov) 1386 { 1387 unsigned int i; 1388 1389 for (i = 0; i < out_num + in_num; i++) { 1390 int is_write = i >= out_num; 1391 1392 cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0); 1393 iov++; 1394 } 1395 } 1396 1397 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg, 1398 hwaddr *addr, unsigned int num_sg, 1399 bool is_write) 1400 { 1401 unsigned int i; 1402 hwaddr len; 1403 1404 for (i = 0; i < num_sg; i++) { 1405 len = sg[i].iov_len; 1406 sg[i].iov_base = dma_memory_map(vdev->dma_as, 1407 addr[i], &len, is_write ? 1408 DMA_DIRECTION_FROM_DEVICE : 1409 DMA_DIRECTION_TO_DEVICE, 1410 MEMTXATTRS_UNSPECIFIED); 1411 if (!sg[i].iov_base) { 1412 error_report("virtio: error trying to map MMIO memory"); 1413 exit(1); 1414 } 1415 if (len != sg[i].iov_len) { 1416 error_report("virtio: unexpected memory split"); 1417 exit(1); 1418 } 1419 } 1420 } 1421 1422 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem) 1423 { 1424 virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true); 1425 virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num, 1426 false); 1427 } 1428 1429 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num) 1430 { 1431 VirtQueueElement *elem; 1432 size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0])); 1433 size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]); 1434 size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]); 1435 size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0])); 1436 size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]); 1437 size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]); 1438 1439 assert(sz >= sizeof(VirtQueueElement)); 1440 elem = g_malloc(out_sg_end); 1441 trace_virtqueue_alloc_element(elem, sz, in_num, out_num); 1442 elem->out_num = out_num; 1443 elem->in_num = in_num; 1444 elem->in_addr = (void *)elem + in_addr_ofs; 1445 elem->out_addr = (void *)elem + out_addr_ofs; 1446 elem->in_sg = (void *)elem + in_sg_ofs; 1447 elem->out_sg = (void *)elem + out_sg_ofs; 1448 return elem; 1449 } 1450 1451 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) 1452 { 1453 unsigned int i, head, max; 1454 VRingMemoryRegionCaches *caches; 1455 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; 1456 MemoryRegionCache *desc_cache; 1457 int64_t len; 1458 VirtIODevice *vdev = vq->vdev; 1459 VirtQueueElement *elem = NULL; 1460 unsigned out_num, in_num, elem_entries; 1461 hwaddr addr[VIRTQUEUE_MAX_SIZE]; 1462 struct iovec iov[VIRTQUEUE_MAX_SIZE]; 1463 VRingDesc desc; 1464 int rc; 1465 1466 RCU_READ_LOCK_GUARD(); 1467 if (virtio_queue_empty_rcu(vq)) { 1468 goto done; 1469 } 1470 /* Needed after virtio_queue_empty(), see comment in 1471 * virtqueue_num_heads(). */ 1472 smp_rmb(); 1473 1474 /* When we start there are none of either input nor output. */ 1475 out_num = in_num = elem_entries = 0; 1476 1477 max = vq->vring.num; 1478 1479 if (vq->inuse >= vq->vring.num) { 1480 virtio_error(vdev, "Virtqueue size exceeded"); 1481 goto done; 1482 } 1483 1484 if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) { 1485 goto done; 1486 } 1487 1488 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 1489 vring_set_avail_event(vq, vq->last_avail_idx); 1490 } 1491 1492 i = head; 1493 1494 caches = vring_get_region_caches(vq); 1495 if (!caches) { 1496 virtio_error(vdev, "Region caches not initialized"); 1497 goto done; 1498 } 1499 1500 if (caches->desc.len < max * sizeof(VRingDesc)) { 1501 virtio_error(vdev, "Cannot map descriptor ring"); 1502 goto done; 1503 } 1504 1505 desc_cache = &caches->desc; 1506 vring_split_desc_read(vdev, &desc, desc_cache, i); 1507 if (desc.flags & VRING_DESC_F_INDIRECT) { 1508 if (!desc.len || (desc.len % sizeof(VRingDesc))) { 1509 virtio_error(vdev, "Invalid size for indirect buffer table"); 1510 goto done; 1511 } 1512 1513 /* loop over the indirect descriptor table */ 1514 len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, 1515 desc.addr, desc.len, false); 1516 desc_cache = &indirect_desc_cache; 1517 if (len < desc.len) { 1518 virtio_error(vdev, "Cannot map indirect buffer"); 1519 goto done; 1520 } 1521 1522 max = desc.len / sizeof(VRingDesc); 1523 i = 0; 1524 vring_split_desc_read(vdev, &desc, desc_cache, i); 1525 } 1526 1527 /* Collect all the descriptors */ 1528 do { 1529 bool map_ok; 1530 1531 if (desc.flags & VRING_DESC_F_WRITE) { 1532 map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num, 1533 iov + out_num, 1534 VIRTQUEUE_MAX_SIZE - out_num, true, 1535 desc.addr, desc.len); 1536 } else { 1537 if (in_num) { 1538 virtio_error(vdev, "Incorrect order for descriptors"); 1539 goto err_undo_map; 1540 } 1541 map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov, 1542 VIRTQUEUE_MAX_SIZE, false, 1543 desc.addr, desc.len); 1544 } 1545 if (!map_ok) { 1546 goto err_undo_map; 1547 } 1548 1549 /* If we've got too many, that implies a descriptor loop. */ 1550 if (++elem_entries > max) { 1551 virtio_error(vdev, "Looped descriptor"); 1552 goto err_undo_map; 1553 } 1554 1555 rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i); 1556 } while (rc == VIRTQUEUE_READ_DESC_MORE); 1557 1558 if (rc == VIRTQUEUE_READ_DESC_ERROR) { 1559 goto err_undo_map; 1560 } 1561 1562 /* Now copy what we have collected and mapped */ 1563 elem = virtqueue_alloc_element(sz, out_num, in_num); 1564 elem->index = head; 1565 elem->ndescs = 1; 1566 for (i = 0; i < out_num; i++) { 1567 elem->out_addr[i] = addr[i]; 1568 elem->out_sg[i] = iov[i]; 1569 } 1570 for (i = 0; i < in_num; i++) { 1571 elem->in_addr[i] = addr[out_num + i]; 1572 elem->in_sg[i] = iov[out_num + i]; 1573 } 1574 1575 vq->inuse++; 1576 1577 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); 1578 done: 1579 address_space_cache_destroy(&indirect_desc_cache); 1580 1581 return elem; 1582 1583 err_undo_map: 1584 virtqueue_undo_map_desc(out_num, in_num, iov); 1585 goto done; 1586 } 1587 1588 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) 1589 { 1590 unsigned int i, max; 1591 VRingMemoryRegionCaches *caches; 1592 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; 1593 MemoryRegionCache *desc_cache; 1594 int64_t len; 1595 VirtIODevice *vdev = vq->vdev; 1596 VirtQueueElement *elem = NULL; 1597 unsigned out_num, in_num, elem_entries; 1598 hwaddr addr[VIRTQUEUE_MAX_SIZE]; 1599 struct iovec iov[VIRTQUEUE_MAX_SIZE]; 1600 VRingPackedDesc desc; 1601 uint16_t id; 1602 int rc; 1603 1604 RCU_READ_LOCK_GUARD(); 1605 if (virtio_queue_packed_empty_rcu(vq)) { 1606 goto done; 1607 } 1608 1609 /* When we start there are none of either input nor output. */ 1610 out_num = in_num = elem_entries = 0; 1611 1612 max = vq->vring.num; 1613 1614 if (vq->inuse >= vq->vring.num) { 1615 virtio_error(vdev, "Virtqueue size exceeded"); 1616 goto done; 1617 } 1618 1619 i = vq->last_avail_idx; 1620 1621 caches = vring_get_region_caches(vq); 1622 if (!caches) { 1623 virtio_error(vdev, "Region caches not initialized"); 1624 goto done; 1625 } 1626 1627 if (caches->desc.len < max * sizeof(VRingDesc)) { 1628 virtio_error(vdev, "Cannot map descriptor ring"); 1629 goto done; 1630 } 1631 1632 desc_cache = &caches->desc; 1633 vring_packed_desc_read(vdev, &desc, desc_cache, i, true); 1634 id = desc.id; 1635 if (desc.flags & VRING_DESC_F_INDIRECT) { 1636 if (desc.len % sizeof(VRingPackedDesc)) { 1637 virtio_error(vdev, "Invalid size for indirect buffer table"); 1638 goto done; 1639 } 1640 1641 /* loop over the indirect descriptor table */ 1642 len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, 1643 desc.addr, desc.len, false); 1644 desc_cache = &indirect_desc_cache; 1645 if (len < desc.len) { 1646 virtio_error(vdev, "Cannot map indirect buffer"); 1647 goto done; 1648 } 1649 1650 max = desc.len / sizeof(VRingPackedDesc); 1651 i = 0; 1652 vring_packed_desc_read(vdev, &desc, desc_cache, i, false); 1653 } 1654 1655 /* Collect all the descriptors */ 1656 do { 1657 bool map_ok; 1658 1659 if (desc.flags & VRING_DESC_F_WRITE) { 1660 map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num, 1661 iov + out_num, 1662 VIRTQUEUE_MAX_SIZE - out_num, true, 1663 desc.addr, desc.len); 1664 } else { 1665 if (in_num) { 1666 virtio_error(vdev, "Incorrect order for descriptors"); 1667 goto err_undo_map; 1668 } 1669 map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov, 1670 VIRTQUEUE_MAX_SIZE, false, 1671 desc.addr, desc.len); 1672 } 1673 if (!map_ok) { 1674 goto err_undo_map; 1675 } 1676 1677 /* If we've got too many, that implies a descriptor loop. */ 1678 if (++elem_entries > max) { 1679 virtio_error(vdev, "Looped descriptor"); 1680 goto err_undo_map; 1681 } 1682 1683 rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i, 1684 desc_cache == 1685 &indirect_desc_cache); 1686 } while (rc == VIRTQUEUE_READ_DESC_MORE); 1687 1688 /* Now copy what we have collected and mapped */ 1689 elem = virtqueue_alloc_element(sz, out_num, in_num); 1690 for (i = 0; i < out_num; i++) { 1691 elem->out_addr[i] = addr[i]; 1692 elem->out_sg[i] = iov[i]; 1693 } 1694 for (i = 0; i < in_num; i++) { 1695 elem->in_addr[i] = addr[out_num + i]; 1696 elem->in_sg[i] = iov[out_num + i]; 1697 } 1698 1699 elem->index = id; 1700 elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries; 1701 vq->last_avail_idx += elem->ndescs; 1702 vq->inuse += elem->ndescs; 1703 1704 if (vq->last_avail_idx >= vq->vring.num) { 1705 vq->last_avail_idx -= vq->vring.num; 1706 vq->last_avail_wrap_counter ^= 1; 1707 } 1708 1709 vq->shadow_avail_idx = vq->last_avail_idx; 1710 vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter; 1711 1712 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); 1713 done: 1714 address_space_cache_destroy(&indirect_desc_cache); 1715 1716 return elem; 1717 1718 err_undo_map: 1719 virtqueue_undo_map_desc(out_num, in_num, iov); 1720 goto done; 1721 } 1722 1723 void *virtqueue_pop(VirtQueue *vq, size_t sz) 1724 { 1725 if (virtio_device_disabled(vq->vdev)) { 1726 return NULL; 1727 } 1728 1729 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { 1730 return virtqueue_packed_pop(vq, sz); 1731 } else { 1732 return virtqueue_split_pop(vq, sz); 1733 } 1734 } 1735 1736 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq) 1737 { 1738 VRingMemoryRegionCaches *caches; 1739 MemoryRegionCache *desc_cache; 1740 unsigned int dropped = 0; 1741 VirtQueueElement elem = {}; 1742 VirtIODevice *vdev = vq->vdev; 1743 VRingPackedDesc desc; 1744 1745 RCU_READ_LOCK_GUARD(); 1746 1747 caches = vring_get_region_caches(vq); 1748 if (!caches) { 1749 return 0; 1750 } 1751 1752 desc_cache = &caches->desc; 1753 1754 virtio_queue_set_notification(vq, 0); 1755 1756 while (vq->inuse < vq->vring.num) { 1757 unsigned int idx = vq->last_avail_idx; 1758 /* 1759 * works similar to virtqueue_pop but does not map buffers 1760 * and does not allocate any memory. 1761 */ 1762 vring_packed_desc_read(vdev, &desc, desc_cache, 1763 vq->last_avail_idx , true); 1764 if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) { 1765 break; 1766 } 1767 elem.index = desc.id; 1768 elem.ndescs = 1; 1769 while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache, 1770 vq->vring.num, &idx, false)) { 1771 ++elem.ndescs; 1772 } 1773 /* 1774 * immediately push the element, nothing to unmap 1775 * as both in_num and out_num are set to 0. 1776 */ 1777 virtqueue_push(vq, &elem, 0); 1778 dropped++; 1779 vq->last_avail_idx += elem.ndescs; 1780 if (vq->last_avail_idx >= vq->vring.num) { 1781 vq->last_avail_idx -= vq->vring.num; 1782 vq->last_avail_wrap_counter ^= 1; 1783 } 1784 } 1785 1786 return dropped; 1787 } 1788 1789 static unsigned int virtqueue_split_drop_all(VirtQueue *vq) 1790 { 1791 unsigned int dropped = 0; 1792 VirtQueueElement elem = {}; 1793 VirtIODevice *vdev = vq->vdev; 1794 bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1795 1796 while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) { 1797 /* works similar to virtqueue_pop but does not map buffers 1798 * and does not allocate any memory */ 1799 smp_rmb(); 1800 if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) { 1801 break; 1802 } 1803 vq->inuse++; 1804 vq->last_avail_idx++; 1805 if (fEventIdx) { 1806 vring_set_avail_event(vq, vq->last_avail_idx); 1807 } 1808 /* immediately push the element, nothing to unmap 1809 * as both in_num and out_num are set to 0 */ 1810 virtqueue_push(vq, &elem, 0); 1811 dropped++; 1812 } 1813 1814 return dropped; 1815 } 1816 1817 /* virtqueue_drop_all: 1818 * @vq: The #VirtQueue 1819 * Drops all queued buffers and indicates them to the guest 1820 * as if they are done. Useful when buffers can not be 1821 * processed but must be returned to the guest. 1822 */ 1823 unsigned int virtqueue_drop_all(VirtQueue *vq) 1824 { 1825 struct VirtIODevice *vdev = vq->vdev; 1826 1827 if (virtio_device_disabled(vq->vdev)) { 1828 return 0; 1829 } 1830 1831 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 1832 return virtqueue_packed_drop_all(vq); 1833 } else { 1834 return virtqueue_split_drop_all(vq); 1835 } 1836 } 1837 1838 /* Reading and writing a structure directly to QEMUFile is *awful*, but 1839 * it is what QEMU has always done by mistake. We can change it sooner 1840 * or later by bumping the version number of the affected vm states. 1841 * In the meanwhile, since the in-memory layout of VirtQueueElement 1842 * has changed, we need to marshal to and from the layout that was 1843 * used before the change. 1844 */ 1845 typedef struct VirtQueueElementOld { 1846 unsigned int index; 1847 unsigned int out_num; 1848 unsigned int in_num; 1849 hwaddr in_addr[VIRTQUEUE_MAX_SIZE]; 1850 hwaddr out_addr[VIRTQUEUE_MAX_SIZE]; 1851 struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; 1852 struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; 1853 } VirtQueueElementOld; 1854 1855 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz) 1856 { 1857 VirtQueueElement *elem; 1858 VirtQueueElementOld data; 1859 int i; 1860 1861 qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); 1862 1863 /* TODO: teach all callers that this can fail, and return failure instead 1864 * of asserting here. 1865 * This is just one thing (there are probably more) that must be 1866 * fixed before we can allow NDEBUG compilation. 1867 */ 1868 assert(ARRAY_SIZE(data.in_addr) >= data.in_num); 1869 assert(ARRAY_SIZE(data.out_addr) >= data.out_num); 1870 1871 elem = virtqueue_alloc_element(sz, data.out_num, data.in_num); 1872 elem->index = data.index; 1873 1874 for (i = 0; i < elem->in_num; i++) { 1875 elem->in_addr[i] = data.in_addr[i]; 1876 } 1877 1878 for (i = 0; i < elem->out_num; i++) { 1879 elem->out_addr[i] = data.out_addr[i]; 1880 } 1881 1882 for (i = 0; i < elem->in_num; i++) { 1883 /* Base is overwritten by virtqueue_map. */ 1884 elem->in_sg[i].iov_base = 0; 1885 elem->in_sg[i].iov_len = data.in_sg[i].iov_len; 1886 } 1887 1888 for (i = 0; i < elem->out_num; i++) { 1889 /* Base is overwritten by virtqueue_map. */ 1890 elem->out_sg[i].iov_base = 0; 1891 elem->out_sg[i].iov_len = data.out_sg[i].iov_len; 1892 } 1893 1894 if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 1895 qemu_get_be32s(f, &elem->ndescs); 1896 } 1897 1898 virtqueue_map(vdev, elem); 1899 return elem; 1900 } 1901 1902 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, 1903 VirtQueueElement *elem) 1904 { 1905 VirtQueueElementOld data; 1906 int i; 1907 1908 memset(&data, 0, sizeof(data)); 1909 data.index = elem->index; 1910 data.in_num = elem->in_num; 1911 data.out_num = elem->out_num; 1912 1913 for (i = 0; i < elem->in_num; i++) { 1914 data.in_addr[i] = elem->in_addr[i]; 1915 } 1916 1917 for (i = 0; i < elem->out_num; i++) { 1918 data.out_addr[i] = elem->out_addr[i]; 1919 } 1920 1921 for (i = 0; i < elem->in_num; i++) { 1922 /* Base is overwritten by virtqueue_map when loading. Do not 1923 * save it, as it would leak the QEMU address space layout. */ 1924 data.in_sg[i].iov_len = elem->in_sg[i].iov_len; 1925 } 1926 1927 for (i = 0; i < elem->out_num; i++) { 1928 /* Do not save iov_base as above. */ 1929 data.out_sg[i].iov_len = elem->out_sg[i].iov_len; 1930 } 1931 1932 if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 1933 qemu_put_be32s(f, &elem->ndescs); 1934 } 1935 1936 qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); 1937 } 1938 1939 /* virtio device */ 1940 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector) 1941 { 1942 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1943 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1944 1945 if (virtio_device_disabled(vdev)) { 1946 return; 1947 } 1948 1949 if (k->notify) { 1950 k->notify(qbus->parent, vector); 1951 } 1952 } 1953 1954 void virtio_update_irq(VirtIODevice *vdev) 1955 { 1956 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 1957 } 1958 1959 static int virtio_validate_features(VirtIODevice *vdev) 1960 { 1961 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1962 1963 if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) && 1964 !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) { 1965 return -EFAULT; 1966 } 1967 1968 if (k->validate_features) { 1969 return k->validate_features(vdev); 1970 } else { 1971 return 0; 1972 } 1973 } 1974 1975 int virtio_set_status(VirtIODevice *vdev, uint8_t val) 1976 { 1977 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1978 trace_virtio_set_status(vdev, val); 1979 1980 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 1981 if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) && 1982 val & VIRTIO_CONFIG_S_FEATURES_OK) { 1983 int ret = virtio_validate_features(vdev); 1984 1985 if (ret) { 1986 return ret; 1987 } 1988 } 1989 } 1990 1991 if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) != 1992 (val & VIRTIO_CONFIG_S_DRIVER_OK)) { 1993 virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK); 1994 } 1995 1996 if (k->set_status) { 1997 k->set_status(vdev, val); 1998 } 1999 vdev->status = val; 2000 2001 return 0; 2002 } 2003 2004 static enum virtio_device_endian virtio_default_endian(void) 2005 { 2006 if (target_words_bigendian()) { 2007 return VIRTIO_DEVICE_ENDIAN_BIG; 2008 } else { 2009 return VIRTIO_DEVICE_ENDIAN_LITTLE; 2010 } 2011 } 2012 2013 static enum virtio_device_endian virtio_current_cpu_endian(void) 2014 { 2015 if (cpu_virtio_is_big_endian(current_cpu)) { 2016 return VIRTIO_DEVICE_ENDIAN_BIG; 2017 } else { 2018 return VIRTIO_DEVICE_ENDIAN_LITTLE; 2019 } 2020 } 2021 2022 void virtio_reset(void *opaque) 2023 { 2024 VirtIODevice *vdev = opaque; 2025 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2026 int i; 2027 2028 virtio_set_status(vdev, 0); 2029 if (current_cpu) { 2030 /* Guest initiated reset */ 2031 vdev->device_endian = virtio_current_cpu_endian(); 2032 } else { 2033 /* System reset */ 2034 vdev->device_endian = virtio_default_endian(); 2035 } 2036 2037 if (k->reset) { 2038 k->reset(vdev); 2039 } 2040 2041 vdev->start_on_kick = false; 2042 vdev->started = false; 2043 vdev->broken = false; 2044 vdev->guest_features = 0; 2045 vdev->queue_sel = 0; 2046 vdev->status = 0; 2047 vdev->disabled = false; 2048 qatomic_set(&vdev->isr, 0); 2049 vdev->config_vector = VIRTIO_NO_VECTOR; 2050 virtio_notify_vector(vdev, vdev->config_vector); 2051 2052 for(i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2053 vdev->vq[i].vring.desc = 0; 2054 vdev->vq[i].vring.avail = 0; 2055 vdev->vq[i].vring.used = 0; 2056 vdev->vq[i].last_avail_idx = 0; 2057 vdev->vq[i].shadow_avail_idx = 0; 2058 vdev->vq[i].used_idx = 0; 2059 vdev->vq[i].last_avail_wrap_counter = true; 2060 vdev->vq[i].shadow_avail_wrap_counter = true; 2061 vdev->vq[i].used_wrap_counter = true; 2062 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); 2063 vdev->vq[i].signalled_used = 0; 2064 vdev->vq[i].signalled_used_valid = false; 2065 vdev->vq[i].notification = true; 2066 vdev->vq[i].vring.num = vdev->vq[i].vring.num_default; 2067 vdev->vq[i].inuse = 0; 2068 virtio_virtqueue_reset_region_cache(&vdev->vq[i]); 2069 } 2070 } 2071 2072 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr) 2073 { 2074 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2075 uint8_t val; 2076 2077 if (addr + sizeof(val) > vdev->config_len) { 2078 return (uint32_t)-1; 2079 } 2080 2081 k->get_config(vdev, vdev->config); 2082 2083 val = ldub_p(vdev->config + addr); 2084 return val; 2085 } 2086 2087 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr) 2088 { 2089 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2090 uint16_t val; 2091 2092 if (addr + sizeof(val) > vdev->config_len) { 2093 return (uint32_t)-1; 2094 } 2095 2096 k->get_config(vdev, vdev->config); 2097 2098 val = lduw_p(vdev->config + addr); 2099 return val; 2100 } 2101 2102 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr) 2103 { 2104 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2105 uint32_t val; 2106 2107 if (addr + sizeof(val) > vdev->config_len) { 2108 return (uint32_t)-1; 2109 } 2110 2111 k->get_config(vdev, vdev->config); 2112 2113 val = ldl_p(vdev->config + addr); 2114 return val; 2115 } 2116 2117 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data) 2118 { 2119 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2120 uint8_t val = data; 2121 2122 if (addr + sizeof(val) > vdev->config_len) { 2123 return; 2124 } 2125 2126 stb_p(vdev->config + addr, val); 2127 2128 if (k->set_config) { 2129 k->set_config(vdev, vdev->config); 2130 } 2131 } 2132 2133 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data) 2134 { 2135 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2136 uint16_t val = data; 2137 2138 if (addr + sizeof(val) > vdev->config_len) { 2139 return; 2140 } 2141 2142 stw_p(vdev->config + addr, val); 2143 2144 if (k->set_config) { 2145 k->set_config(vdev, vdev->config); 2146 } 2147 } 2148 2149 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data) 2150 { 2151 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2152 uint32_t val = data; 2153 2154 if (addr + sizeof(val) > vdev->config_len) { 2155 return; 2156 } 2157 2158 stl_p(vdev->config + addr, val); 2159 2160 if (k->set_config) { 2161 k->set_config(vdev, vdev->config); 2162 } 2163 } 2164 2165 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr) 2166 { 2167 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2168 uint8_t val; 2169 2170 if (addr + sizeof(val) > vdev->config_len) { 2171 return (uint32_t)-1; 2172 } 2173 2174 k->get_config(vdev, vdev->config); 2175 2176 val = ldub_p(vdev->config + addr); 2177 return val; 2178 } 2179 2180 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr) 2181 { 2182 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2183 uint16_t val; 2184 2185 if (addr + sizeof(val) > vdev->config_len) { 2186 return (uint32_t)-1; 2187 } 2188 2189 k->get_config(vdev, vdev->config); 2190 2191 val = lduw_le_p(vdev->config + addr); 2192 return val; 2193 } 2194 2195 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr) 2196 { 2197 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2198 uint32_t val; 2199 2200 if (addr + sizeof(val) > vdev->config_len) { 2201 return (uint32_t)-1; 2202 } 2203 2204 k->get_config(vdev, vdev->config); 2205 2206 val = ldl_le_p(vdev->config + addr); 2207 return val; 2208 } 2209 2210 void virtio_config_modern_writeb(VirtIODevice *vdev, 2211 uint32_t addr, uint32_t data) 2212 { 2213 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2214 uint8_t val = data; 2215 2216 if (addr + sizeof(val) > vdev->config_len) { 2217 return; 2218 } 2219 2220 stb_p(vdev->config + addr, val); 2221 2222 if (k->set_config) { 2223 k->set_config(vdev, vdev->config); 2224 } 2225 } 2226 2227 void virtio_config_modern_writew(VirtIODevice *vdev, 2228 uint32_t addr, uint32_t data) 2229 { 2230 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2231 uint16_t val = data; 2232 2233 if (addr + sizeof(val) > vdev->config_len) { 2234 return; 2235 } 2236 2237 stw_le_p(vdev->config + addr, val); 2238 2239 if (k->set_config) { 2240 k->set_config(vdev, vdev->config); 2241 } 2242 } 2243 2244 void virtio_config_modern_writel(VirtIODevice *vdev, 2245 uint32_t addr, uint32_t data) 2246 { 2247 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2248 uint32_t val = data; 2249 2250 if (addr + sizeof(val) > vdev->config_len) { 2251 return; 2252 } 2253 2254 stl_le_p(vdev->config + addr, val); 2255 2256 if (k->set_config) { 2257 k->set_config(vdev, vdev->config); 2258 } 2259 } 2260 2261 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr) 2262 { 2263 if (!vdev->vq[n].vring.num) { 2264 return; 2265 } 2266 vdev->vq[n].vring.desc = addr; 2267 virtio_queue_update_rings(vdev, n); 2268 } 2269 2270 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n) 2271 { 2272 return vdev->vq[n].vring.desc; 2273 } 2274 2275 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc, 2276 hwaddr avail, hwaddr used) 2277 { 2278 if (!vdev->vq[n].vring.num) { 2279 return; 2280 } 2281 vdev->vq[n].vring.desc = desc; 2282 vdev->vq[n].vring.avail = avail; 2283 vdev->vq[n].vring.used = used; 2284 virtio_init_region_cache(vdev, n); 2285 } 2286 2287 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) 2288 { 2289 /* Don't allow guest to flip queue between existent and 2290 * nonexistent states, or to set it to an invalid size. 2291 */ 2292 if (!!num != !!vdev->vq[n].vring.num || 2293 num > VIRTQUEUE_MAX_SIZE || 2294 num < 0) { 2295 return; 2296 } 2297 vdev->vq[n].vring.num = num; 2298 } 2299 2300 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector) 2301 { 2302 return QLIST_FIRST(&vdev->vector_queues[vector]); 2303 } 2304 2305 VirtQueue *virtio_vector_next_queue(VirtQueue *vq) 2306 { 2307 return QLIST_NEXT(vq, node); 2308 } 2309 2310 int virtio_queue_get_num(VirtIODevice *vdev, int n) 2311 { 2312 return vdev->vq[n].vring.num; 2313 } 2314 2315 int virtio_queue_get_max_num(VirtIODevice *vdev, int n) 2316 { 2317 return vdev->vq[n].vring.num_default; 2318 } 2319 2320 int virtio_get_num_queues(VirtIODevice *vdev) 2321 { 2322 int i; 2323 2324 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2325 if (!virtio_queue_get_num(vdev, i)) { 2326 break; 2327 } 2328 } 2329 2330 return i; 2331 } 2332 2333 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) 2334 { 2335 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 2336 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 2337 2338 /* virtio-1 compliant devices cannot change the alignment */ 2339 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 2340 error_report("tried to modify queue alignment for virtio-1 device"); 2341 return; 2342 } 2343 /* Check that the transport told us it was going to do this 2344 * (so a buggy transport will immediately assert rather than 2345 * silently failing to migrate this state) 2346 */ 2347 assert(k->has_variable_vring_alignment); 2348 2349 if (align) { 2350 vdev->vq[n].vring.align = align; 2351 virtio_queue_update_rings(vdev, n); 2352 } 2353 } 2354 2355 static void virtio_queue_notify_vq(VirtQueue *vq) 2356 { 2357 if (vq->vring.desc && vq->handle_output) { 2358 VirtIODevice *vdev = vq->vdev; 2359 2360 if (unlikely(vdev->broken)) { 2361 return; 2362 } 2363 2364 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); 2365 vq->handle_output(vdev, vq); 2366 2367 if (unlikely(vdev->start_on_kick)) { 2368 virtio_set_started(vdev, true); 2369 } 2370 } 2371 } 2372 2373 void virtio_queue_notify(VirtIODevice *vdev, int n) 2374 { 2375 VirtQueue *vq = &vdev->vq[n]; 2376 2377 if (unlikely(!vq->vring.desc || vdev->broken)) { 2378 return; 2379 } 2380 2381 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); 2382 if (vq->host_notifier_enabled) { 2383 event_notifier_set(&vq->host_notifier); 2384 } else if (vq->handle_output) { 2385 vq->handle_output(vdev, vq); 2386 2387 if (unlikely(vdev->start_on_kick)) { 2388 virtio_set_started(vdev, true); 2389 } 2390 } 2391 } 2392 2393 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) 2394 { 2395 return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector : 2396 VIRTIO_NO_VECTOR; 2397 } 2398 2399 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector) 2400 { 2401 VirtQueue *vq = &vdev->vq[n]; 2402 2403 if (n < VIRTIO_QUEUE_MAX) { 2404 if (vdev->vector_queues && 2405 vdev->vq[n].vector != VIRTIO_NO_VECTOR) { 2406 QLIST_REMOVE(vq, node); 2407 } 2408 vdev->vq[n].vector = vector; 2409 if (vdev->vector_queues && 2410 vector != VIRTIO_NO_VECTOR) { 2411 QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node); 2412 } 2413 } 2414 } 2415 2416 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, 2417 VirtIOHandleOutput handle_output) 2418 { 2419 int i; 2420 2421 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2422 if (vdev->vq[i].vring.num == 0) 2423 break; 2424 } 2425 2426 if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) 2427 abort(); 2428 2429 vdev->vq[i].vring.num = queue_size; 2430 vdev->vq[i].vring.num_default = queue_size; 2431 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN; 2432 vdev->vq[i].handle_output = handle_output; 2433 vdev->vq[i].used_elems = g_new0(VirtQueueElement, queue_size); 2434 2435 return &vdev->vq[i]; 2436 } 2437 2438 void virtio_delete_queue(VirtQueue *vq) 2439 { 2440 vq->vring.num = 0; 2441 vq->vring.num_default = 0; 2442 vq->handle_output = NULL; 2443 g_free(vq->used_elems); 2444 vq->used_elems = NULL; 2445 virtio_virtqueue_reset_region_cache(vq); 2446 } 2447 2448 void virtio_del_queue(VirtIODevice *vdev, int n) 2449 { 2450 if (n < 0 || n >= VIRTIO_QUEUE_MAX) { 2451 abort(); 2452 } 2453 2454 virtio_delete_queue(&vdev->vq[n]); 2455 } 2456 2457 static void virtio_set_isr(VirtIODevice *vdev, int value) 2458 { 2459 uint8_t old = qatomic_read(&vdev->isr); 2460 2461 /* Do not write ISR if it does not change, so that its cacheline remains 2462 * shared in the common case where the guest does not read it. 2463 */ 2464 if ((old & value) != value) { 2465 qatomic_or(&vdev->isr, value); 2466 } 2467 } 2468 2469 /* Called within rcu_read_lock(). */ 2470 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq) 2471 { 2472 uint16_t old, new; 2473 bool v; 2474 /* We need to expose used array entries before checking used event. */ 2475 smp_mb(); 2476 /* Always notify when queue is empty (when feature acknowledge) */ 2477 if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) && 2478 !vq->inuse && virtio_queue_empty(vq)) { 2479 return true; 2480 } 2481 2482 if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 2483 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); 2484 } 2485 2486 v = vq->signalled_used_valid; 2487 vq->signalled_used_valid = true; 2488 old = vq->signalled_used; 2489 new = vq->signalled_used = vq->used_idx; 2490 return !v || vring_need_event(vring_get_used_event(vq), new, old); 2491 } 2492 2493 static bool vring_packed_need_event(VirtQueue *vq, bool wrap, 2494 uint16_t off_wrap, uint16_t new, 2495 uint16_t old) 2496 { 2497 int off = off_wrap & ~(1 << 15); 2498 2499 if (wrap != off_wrap >> 15) { 2500 off -= vq->vring.num; 2501 } 2502 2503 return vring_need_event(off, new, old); 2504 } 2505 2506 /* Called within rcu_read_lock(). */ 2507 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq) 2508 { 2509 VRingPackedDescEvent e; 2510 uint16_t old, new; 2511 bool v; 2512 VRingMemoryRegionCaches *caches; 2513 2514 caches = vring_get_region_caches(vq); 2515 if (!caches) { 2516 return false; 2517 } 2518 2519 vring_packed_event_read(vdev, &caches->avail, &e); 2520 2521 old = vq->signalled_used; 2522 new = vq->signalled_used = vq->used_idx; 2523 v = vq->signalled_used_valid; 2524 vq->signalled_used_valid = true; 2525 2526 if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) { 2527 return false; 2528 } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) { 2529 return true; 2530 } 2531 2532 return !v || vring_packed_need_event(vq, vq->used_wrap_counter, 2533 e.off_wrap, new, old); 2534 } 2535 2536 /* Called within rcu_read_lock(). */ 2537 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) 2538 { 2539 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 2540 return virtio_packed_should_notify(vdev, vq); 2541 } else { 2542 return virtio_split_should_notify(vdev, vq); 2543 } 2544 } 2545 2546 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq) 2547 { 2548 WITH_RCU_READ_LOCK_GUARD() { 2549 if (!virtio_should_notify(vdev, vq)) { 2550 return; 2551 } 2552 } 2553 2554 trace_virtio_notify_irqfd(vdev, vq); 2555 2556 /* 2557 * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but 2558 * windows drivers included in virtio-win 1.8.0 (circa 2015) are 2559 * incorrectly polling this bit during crashdump and hibernation 2560 * in MSI mode, causing a hang if this bit is never updated. 2561 * Recent releases of Windows do not really shut down, but rather 2562 * log out and hibernate to make the next startup faster. Hence, 2563 * this manifested as a more serious hang during shutdown with 2564 * 2565 * Next driver release from 2016 fixed this problem, so working around it 2566 * is not a must, but it's easy to do so let's do it here. 2567 * 2568 * Note: it's safe to update ISR from any thread as it was switched 2569 * to an atomic operation. 2570 */ 2571 virtio_set_isr(vq->vdev, 0x1); 2572 event_notifier_set(&vq->guest_notifier); 2573 } 2574 2575 static void virtio_irq(VirtQueue *vq) 2576 { 2577 virtio_set_isr(vq->vdev, 0x1); 2578 virtio_notify_vector(vq->vdev, vq->vector); 2579 } 2580 2581 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) 2582 { 2583 WITH_RCU_READ_LOCK_GUARD() { 2584 if (!virtio_should_notify(vdev, vq)) { 2585 return; 2586 } 2587 } 2588 2589 trace_virtio_notify(vdev, vq); 2590 virtio_irq(vq); 2591 } 2592 2593 void virtio_notify_config(VirtIODevice *vdev) 2594 { 2595 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 2596 return; 2597 2598 virtio_set_isr(vdev, 0x3); 2599 vdev->generation++; 2600 virtio_notify_vector(vdev, vdev->config_vector); 2601 } 2602 2603 static bool virtio_device_endian_needed(void *opaque) 2604 { 2605 VirtIODevice *vdev = opaque; 2606 2607 assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN); 2608 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 2609 return vdev->device_endian != virtio_default_endian(); 2610 } 2611 /* Devices conforming to VIRTIO 1.0 or later are always LE. */ 2612 return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE; 2613 } 2614 2615 static bool virtio_64bit_features_needed(void *opaque) 2616 { 2617 VirtIODevice *vdev = opaque; 2618 2619 return (vdev->host_features >> 32) != 0; 2620 } 2621 2622 static bool virtio_virtqueue_needed(void *opaque) 2623 { 2624 VirtIODevice *vdev = opaque; 2625 2626 return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1); 2627 } 2628 2629 static bool virtio_packed_virtqueue_needed(void *opaque) 2630 { 2631 VirtIODevice *vdev = opaque; 2632 2633 return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED); 2634 } 2635 2636 static bool virtio_ringsize_needed(void *opaque) 2637 { 2638 VirtIODevice *vdev = opaque; 2639 int i; 2640 2641 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2642 if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) { 2643 return true; 2644 } 2645 } 2646 return false; 2647 } 2648 2649 static bool virtio_extra_state_needed(void *opaque) 2650 { 2651 VirtIODevice *vdev = opaque; 2652 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 2653 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 2654 2655 return k->has_extra_state && 2656 k->has_extra_state(qbus->parent); 2657 } 2658 2659 static bool virtio_broken_needed(void *opaque) 2660 { 2661 VirtIODevice *vdev = opaque; 2662 2663 return vdev->broken; 2664 } 2665 2666 static bool virtio_started_needed(void *opaque) 2667 { 2668 VirtIODevice *vdev = opaque; 2669 2670 return vdev->started; 2671 } 2672 2673 static bool virtio_disabled_needed(void *opaque) 2674 { 2675 VirtIODevice *vdev = opaque; 2676 2677 return vdev->disabled; 2678 } 2679 2680 static const VMStateDescription vmstate_virtqueue = { 2681 .name = "virtqueue_state", 2682 .version_id = 1, 2683 .minimum_version_id = 1, 2684 .fields = (VMStateField[]) { 2685 VMSTATE_UINT64(vring.avail, struct VirtQueue), 2686 VMSTATE_UINT64(vring.used, struct VirtQueue), 2687 VMSTATE_END_OF_LIST() 2688 } 2689 }; 2690 2691 static const VMStateDescription vmstate_packed_virtqueue = { 2692 .name = "packed_virtqueue_state", 2693 .version_id = 1, 2694 .minimum_version_id = 1, 2695 .fields = (VMStateField[]) { 2696 VMSTATE_UINT16(last_avail_idx, struct VirtQueue), 2697 VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue), 2698 VMSTATE_UINT16(used_idx, struct VirtQueue), 2699 VMSTATE_BOOL(used_wrap_counter, struct VirtQueue), 2700 VMSTATE_UINT32(inuse, struct VirtQueue), 2701 VMSTATE_END_OF_LIST() 2702 } 2703 }; 2704 2705 static const VMStateDescription vmstate_virtio_virtqueues = { 2706 .name = "virtio/virtqueues", 2707 .version_id = 1, 2708 .minimum_version_id = 1, 2709 .needed = &virtio_virtqueue_needed, 2710 .fields = (VMStateField[]) { 2711 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, 2712 VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue), 2713 VMSTATE_END_OF_LIST() 2714 } 2715 }; 2716 2717 static const VMStateDescription vmstate_virtio_packed_virtqueues = { 2718 .name = "virtio/packed_virtqueues", 2719 .version_id = 1, 2720 .minimum_version_id = 1, 2721 .needed = &virtio_packed_virtqueue_needed, 2722 .fields = (VMStateField[]) { 2723 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, 2724 VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue), 2725 VMSTATE_END_OF_LIST() 2726 } 2727 }; 2728 2729 static const VMStateDescription vmstate_ringsize = { 2730 .name = "ringsize_state", 2731 .version_id = 1, 2732 .minimum_version_id = 1, 2733 .fields = (VMStateField[]) { 2734 VMSTATE_UINT32(vring.num_default, struct VirtQueue), 2735 VMSTATE_END_OF_LIST() 2736 } 2737 }; 2738 2739 static const VMStateDescription vmstate_virtio_ringsize = { 2740 .name = "virtio/ringsize", 2741 .version_id = 1, 2742 .minimum_version_id = 1, 2743 .needed = &virtio_ringsize_needed, 2744 .fields = (VMStateField[]) { 2745 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, 2746 VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue), 2747 VMSTATE_END_OF_LIST() 2748 } 2749 }; 2750 2751 static int get_extra_state(QEMUFile *f, void *pv, size_t size, 2752 const VMStateField *field) 2753 { 2754 VirtIODevice *vdev = pv; 2755 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 2756 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 2757 2758 if (!k->load_extra_state) { 2759 return -1; 2760 } else { 2761 return k->load_extra_state(qbus->parent, f); 2762 } 2763 } 2764 2765 static int put_extra_state(QEMUFile *f, void *pv, size_t size, 2766 const VMStateField *field, JSONWriter *vmdesc) 2767 { 2768 VirtIODevice *vdev = pv; 2769 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 2770 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 2771 2772 k->save_extra_state(qbus->parent, f); 2773 return 0; 2774 } 2775 2776 static const VMStateInfo vmstate_info_extra_state = { 2777 .name = "virtqueue_extra_state", 2778 .get = get_extra_state, 2779 .put = put_extra_state, 2780 }; 2781 2782 static const VMStateDescription vmstate_virtio_extra_state = { 2783 .name = "virtio/extra_state", 2784 .version_id = 1, 2785 .minimum_version_id = 1, 2786 .needed = &virtio_extra_state_needed, 2787 .fields = (VMStateField[]) { 2788 { 2789 .name = "extra_state", 2790 .version_id = 0, 2791 .field_exists = NULL, 2792 .size = 0, 2793 .info = &vmstate_info_extra_state, 2794 .flags = VMS_SINGLE, 2795 .offset = 0, 2796 }, 2797 VMSTATE_END_OF_LIST() 2798 } 2799 }; 2800 2801 static const VMStateDescription vmstate_virtio_device_endian = { 2802 .name = "virtio/device_endian", 2803 .version_id = 1, 2804 .minimum_version_id = 1, 2805 .needed = &virtio_device_endian_needed, 2806 .fields = (VMStateField[]) { 2807 VMSTATE_UINT8(device_endian, VirtIODevice), 2808 VMSTATE_END_OF_LIST() 2809 } 2810 }; 2811 2812 static const VMStateDescription vmstate_virtio_64bit_features = { 2813 .name = "virtio/64bit_features", 2814 .version_id = 1, 2815 .minimum_version_id = 1, 2816 .needed = &virtio_64bit_features_needed, 2817 .fields = (VMStateField[]) { 2818 VMSTATE_UINT64(guest_features, VirtIODevice), 2819 VMSTATE_END_OF_LIST() 2820 } 2821 }; 2822 2823 static const VMStateDescription vmstate_virtio_broken = { 2824 .name = "virtio/broken", 2825 .version_id = 1, 2826 .minimum_version_id = 1, 2827 .needed = &virtio_broken_needed, 2828 .fields = (VMStateField[]) { 2829 VMSTATE_BOOL(broken, VirtIODevice), 2830 VMSTATE_END_OF_LIST() 2831 } 2832 }; 2833 2834 static const VMStateDescription vmstate_virtio_started = { 2835 .name = "virtio/started", 2836 .version_id = 1, 2837 .minimum_version_id = 1, 2838 .needed = &virtio_started_needed, 2839 .fields = (VMStateField[]) { 2840 VMSTATE_BOOL(started, VirtIODevice), 2841 VMSTATE_END_OF_LIST() 2842 } 2843 }; 2844 2845 static const VMStateDescription vmstate_virtio_disabled = { 2846 .name = "virtio/disabled", 2847 .version_id = 1, 2848 .minimum_version_id = 1, 2849 .needed = &virtio_disabled_needed, 2850 .fields = (VMStateField[]) { 2851 VMSTATE_BOOL(disabled, VirtIODevice), 2852 VMSTATE_END_OF_LIST() 2853 } 2854 }; 2855 2856 static const VMStateDescription vmstate_virtio = { 2857 .name = "virtio", 2858 .version_id = 1, 2859 .minimum_version_id = 1, 2860 .fields = (VMStateField[]) { 2861 VMSTATE_END_OF_LIST() 2862 }, 2863 .subsections = (const VMStateDescription*[]) { 2864 &vmstate_virtio_device_endian, 2865 &vmstate_virtio_64bit_features, 2866 &vmstate_virtio_virtqueues, 2867 &vmstate_virtio_ringsize, 2868 &vmstate_virtio_broken, 2869 &vmstate_virtio_extra_state, 2870 &vmstate_virtio_started, 2871 &vmstate_virtio_packed_virtqueues, 2872 &vmstate_virtio_disabled, 2873 NULL 2874 } 2875 }; 2876 2877 int virtio_save(VirtIODevice *vdev, QEMUFile *f) 2878 { 2879 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 2880 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 2881 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 2882 uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff); 2883 int i; 2884 2885 if (k->save_config) { 2886 k->save_config(qbus->parent, f); 2887 } 2888 2889 qemu_put_8s(f, &vdev->status); 2890 qemu_put_8s(f, &vdev->isr); 2891 qemu_put_be16s(f, &vdev->queue_sel); 2892 qemu_put_be32s(f, &guest_features_lo); 2893 qemu_put_be32(f, vdev->config_len); 2894 qemu_put_buffer(f, vdev->config, vdev->config_len); 2895 2896 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2897 if (vdev->vq[i].vring.num == 0) 2898 break; 2899 } 2900 2901 qemu_put_be32(f, i); 2902 2903 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2904 if (vdev->vq[i].vring.num == 0) 2905 break; 2906 2907 qemu_put_be32(f, vdev->vq[i].vring.num); 2908 if (k->has_variable_vring_alignment) { 2909 qemu_put_be32(f, vdev->vq[i].vring.align); 2910 } 2911 /* 2912 * Save desc now, the rest of the ring addresses are saved in 2913 * subsections for VIRTIO-1 devices. 2914 */ 2915 qemu_put_be64(f, vdev->vq[i].vring.desc); 2916 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); 2917 if (k->save_queue) { 2918 k->save_queue(qbus->parent, i, f); 2919 } 2920 } 2921 2922 if (vdc->save != NULL) { 2923 vdc->save(vdev, f); 2924 } 2925 2926 if (vdc->vmsd) { 2927 int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL); 2928 if (ret) { 2929 return ret; 2930 } 2931 } 2932 2933 /* Subsections */ 2934 return vmstate_save_state(f, &vmstate_virtio, vdev, NULL); 2935 } 2936 2937 /* A wrapper for use as a VMState .put function */ 2938 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size, 2939 const VMStateField *field, JSONWriter *vmdesc) 2940 { 2941 return virtio_save(VIRTIO_DEVICE(opaque), f); 2942 } 2943 2944 /* A wrapper for use as a VMState .get function */ 2945 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size, 2946 const VMStateField *field) 2947 { 2948 VirtIODevice *vdev = VIRTIO_DEVICE(opaque); 2949 DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev)); 2950 2951 return virtio_load(vdev, f, dc->vmsd->version_id); 2952 } 2953 2954 const VMStateInfo virtio_vmstate_info = { 2955 .name = "virtio", 2956 .get = virtio_device_get, 2957 .put = virtio_device_put, 2958 }; 2959 2960 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) 2961 { 2962 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2963 bool bad = (val & ~(vdev->host_features)) != 0; 2964 2965 val &= vdev->host_features; 2966 if (k->set_features) { 2967 k->set_features(vdev, val); 2968 } 2969 vdev->guest_features = val; 2970 return bad ? -1 : 0; 2971 } 2972 2973 int virtio_set_features(VirtIODevice *vdev, uint64_t val) 2974 { 2975 int ret; 2976 /* 2977 * The driver must not attempt to set features after feature negotiation 2978 * has finished. 2979 */ 2980 if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) { 2981 return -EINVAL; 2982 } 2983 2984 if (val & (1ull << VIRTIO_F_BAD_FEATURE)) { 2985 qemu_log_mask(LOG_GUEST_ERROR, 2986 "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n", 2987 __func__, vdev->name); 2988 } 2989 2990 ret = virtio_set_features_nocheck(vdev, val); 2991 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 2992 /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */ 2993 int i; 2994 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2995 if (vdev->vq[i].vring.num != 0) { 2996 virtio_init_region_cache(vdev, i); 2997 } 2998 } 2999 } 3000 if (!ret) { 3001 if (!virtio_device_started(vdev, vdev->status) && 3002 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3003 vdev->start_on_kick = true; 3004 } 3005 } 3006 return ret; 3007 } 3008 3009 size_t virtio_feature_get_config_size(const VirtIOFeature *feature_sizes, 3010 uint64_t host_features) 3011 { 3012 size_t config_size = 0; 3013 int i; 3014 3015 for (i = 0; feature_sizes[i].flags != 0; i++) { 3016 if (host_features & feature_sizes[i].flags) { 3017 config_size = MAX(feature_sizes[i].end, config_size); 3018 } 3019 } 3020 3021 return config_size; 3022 } 3023 3024 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) 3025 { 3026 int i, ret; 3027 int32_t config_len; 3028 uint32_t num; 3029 uint32_t features; 3030 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 3031 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 3032 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 3033 3034 /* 3035 * We poison the endianness to ensure it does not get used before 3036 * subsections have been loaded. 3037 */ 3038 vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN; 3039 3040 if (k->load_config) { 3041 ret = k->load_config(qbus->parent, f); 3042 if (ret) 3043 return ret; 3044 } 3045 3046 qemu_get_8s(f, &vdev->status); 3047 qemu_get_8s(f, &vdev->isr); 3048 qemu_get_be16s(f, &vdev->queue_sel); 3049 if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) { 3050 return -1; 3051 } 3052 qemu_get_be32s(f, &features); 3053 3054 /* 3055 * Temporarily set guest_features low bits - needed by 3056 * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 3057 * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ. 3058 * 3059 * Note: devices should always test host features in future - don't create 3060 * new dependencies like this. 3061 */ 3062 vdev->guest_features = features; 3063 3064 config_len = qemu_get_be32(f); 3065 3066 /* 3067 * There are cases where the incoming config can be bigger or smaller 3068 * than what we have; so load what we have space for, and skip 3069 * any excess that's in the stream. 3070 */ 3071 qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len)); 3072 3073 while (config_len > vdev->config_len) { 3074 qemu_get_byte(f); 3075 config_len--; 3076 } 3077 3078 num = qemu_get_be32(f); 3079 3080 if (num > VIRTIO_QUEUE_MAX) { 3081 error_report("Invalid number of virtqueues: 0x%x", num); 3082 return -1; 3083 } 3084 3085 for (i = 0; i < num; i++) { 3086 vdev->vq[i].vring.num = qemu_get_be32(f); 3087 if (k->has_variable_vring_alignment) { 3088 vdev->vq[i].vring.align = qemu_get_be32(f); 3089 } 3090 vdev->vq[i].vring.desc = qemu_get_be64(f); 3091 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); 3092 vdev->vq[i].signalled_used_valid = false; 3093 vdev->vq[i].notification = true; 3094 3095 if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) { 3096 error_report("VQ %d address 0x0 " 3097 "inconsistent with Host index 0x%x", 3098 i, vdev->vq[i].last_avail_idx); 3099 return -1; 3100 } 3101 if (k->load_queue) { 3102 ret = k->load_queue(qbus->parent, i, f); 3103 if (ret) 3104 return ret; 3105 } 3106 } 3107 3108 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 3109 3110 if (vdc->load != NULL) { 3111 ret = vdc->load(vdev, f, version_id); 3112 if (ret) { 3113 return ret; 3114 } 3115 } 3116 3117 if (vdc->vmsd) { 3118 ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id); 3119 if (ret) { 3120 return ret; 3121 } 3122 } 3123 3124 /* Subsections */ 3125 ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1); 3126 if (ret) { 3127 return ret; 3128 } 3129 3130 if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) { 3131 vdev->device_endian = virtio_default_endian(); 3132 } 3133 3134 if (virtio_64bit_features_needed(vdev)) { 3135 /* 3136 * Subsection load filled vdev->guest_features. Run them 3137 * through virtio_set_features to sanity-check them against 3138 * host_features. 3139 */ 3140 uint64_t features64 = vdev->guest_features; 3141 if (virtio_set_features_nocheck(vdev, features64) < 0) { 3142 error_report("Features 0x%" PRIx64 " unsupported. " 3143 "Allowed features: 0x%" PRIx64, 3144 features64, vdev->host_features); 3145 return -1; 3146 } 3147 } else { 3148 if (virtio_set_features_nocheck(vdev, features) < 0) { 3149 error_report("Features 0x%x unsupported. " 3150 "Allowed features: 0x%" PRIx64, 3151 features, vdev->host_features); 3152 return -1; 3153 } 3154 } 3155 3156 if (!virtio_device_started(vdev, vdev->status) && 3157 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3158 vdev->start_on_kick = true; 3159 } 3160 3161 RCU_READ_LOCK_GUARD(); 3162 for (i = 0; i < num; i++) { 3163 if (vdev->vq[i].vring.desc) { 3164 uint16_t nheads; 3165 3166 /* 3167 * VIRTIO-1 devices migrate desc, used, and avail ring addresses so 3168 * only the region cache needs to be set up. Legacy devices need 3169 * to calculate used and avail ring addresses based on the desc 3170 * address. 3171 */ 3172 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3173 virtio_init_region_cache(vdev, i); 3174 } else { 3175 virtio_queue_update_rings(vdev, i); 3176 } 3177 3178 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 3179 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx; 3180 vdev->vq[i].shadow_avail_wrap_counter = 3181 vdev->vq[i].last_avail_wrap_counter; 3182 continue; 3183 } 3184 3185 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx; 3186 /* Check it isn't doing strange things with descriptor numbers. */ 3187 if (nheads > vdev->vq[i].vring.num) { 3188 virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x " 3189 "inconsistent with Host index 0x%x: delta 0x%x", 3190 i, vdev->vq[i].vring.num, 3191 vring_avail_idx(&vdev->vq[i]), 3192 vdev->vq[i].last_avail_idx, nheads); 3193 vdev->vq[i].used_idx = 0; 3194 vdev->vq[i].shadow_avail_idx = 0; 3195 vdev->vq[i].inuse = 0; 3196 continue; 3197 } 3198 vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]); 3199 vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]); 3200 3201 /* 3202 * Some devices migrate VirtQueueElements that have been popped 3203 * from the avail ring but not yet returned to the used ring. 3204 * Since max ring size < UINT16_MAX it's safe to use modulo 3205 * UINT16_MAX + 1 subtraction. 3206 */ 3207 vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx - 3208 vdev->vq[i].used_idx); 3209 if (vdev->vq[i].inuse > vdev->vq[i].vring.num) { 3210 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - " 3211 "used_idx 0x%x", 3212 i, vdev->vq[i].vring.num, 3213 vdev->vq[i].last_avail_idx, 3214 vdev->vq[i].used_idx); 3215 return -1; 3216 } 3217 } 3218 } 3219 3220 if (vdc->post_load) { 3221 ret = vdc->post_load(vdev); 3222 if (ret) { 3223 return ret; 3224 } 3225 } 3226 3227 return 0; 3228 } 3229 3230 void virtio_cleanup(VirtIODevice *vdev) 3231 { 3232 qemu_del_vm_change_state_handler(vdev->vmstate); 3233 } 3234 3235 static void virtio_vmstate_change(void *opaque, bool running, RunState state) 3236 { 3237 VirtIODevice *vdev = opaque; 3238 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 3239 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 3240 bool backend_run = running && virtio_device_started(vdev, vdev->status); 3241 vdev->vm_running = running; 3242 3243 if (backend_run) { 3244 virtio_set_status(vdev, vdev->status); 3245 } 3246 3247 if (k->vmstate_change) { 3248 k->vmstate_change(qbus->parent, backend_run); 3249 } 3250 3251 if (!backend_run) { 3252 virtio_set_status(vdev, vdev->status); 3253 } 3254 } 3255 3256 void virtio_instance_init_common(Object *proxy_obj, void *data, 3257 size_t vdev_size, const char *vdev_name) 3258 { 3259 DeviceState *vdev = data; 3260 3261 object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev, 3262 vdev_size, vdev_name, &error_abort, 3263 NULL); 3264 qdev_alias_all_properties(vdev, proxy_obj); 3265 } 3266 3267 void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size) 3268 { 3269 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 3270 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 3271 int i; 3272 int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0; 3273 3274 if (nvectors) { 3275 vdev->vector_queues = 3276 g_malloc0(sizeof(*vdev->vector_queues) * nvectors); 3277 } 3278 3279 vdev->start_on_kick = false; 3280 vdev->started = false; 3281 vdev->vhost_started = false; 3282 vdev->device_id = device_id; 3283 vdev->status = 0; 3284 qatomic_set(&vdev->isr, 0); 3285 vdev->queue_sel = 0; 3286 vdev->config_vector = VIRTIO_NO_VECTOR; 3287 vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX); 3288 vdev->vm_running = runstate_is_running(); 3289 vdev->broken = false; 3290 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 3291 vdev->vq[i].vector = VIRTIO_NO_VECTOR; 3292 vdev->vq[i].vdev = vdev; 3293 vdev->vq[i].queue_index = i; 3294 vdev->vq[i].host_notifier_enabled = false; 3295 } 3296 3297 vdev->name = virtio_id_to_name(device_id); 3298 vdev->config_len = config_size; 3299 if (vdev->config_len) { 3300 vdev->config = g_malloc0(config_size); 3301 } else { 3302 vdev->config = NULL; 3303 } 3304 vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev), 3305 virtio_vmstate_change, vdev); 3306 vdev->device_endian = virtio_default_endian(); 3307 vdev->use_guest_notifier_mask = true; 3308 } 3309 3310 /* 3311 * Only devices that have already been around prior to defining the virtio 3312 * standard support legacy mode; this includes devices not specified in the 3313 * standard. All newer devices conform to the virtio standard only. 3314 */ 3315 bool virtio_legacy_allowed(VirtIODevice *vdev) 3316 { 3317 switch (vdev->device_id) { 3318 case VIRTIO_ID_NET: 3319 case VIRTIO_ID_BLOCK: 3320 case VIRTIO_ID_CONSOLE: 3321 case VIRTIO_ID_RNG: 3322 case VIRTIO_ID_BALLOON: 3323 case VIRTIO_ID_RPMSG: 3324 case VIRTIO_ID_SCSI: 3325 case VIRTIO_ID_9P: 3326 case VIRTIO_ID_RPROC_SERIAL: 3327 case VIRTIO_ID_CAIF: 3328 return true; 3329 default: 3330 return false; 3331 } 3332 } 3333 3334 bool virtio_legacy_check_disabled(VirtIODevice *vdev) 3335 { 3336 return vdev->disable_legacy_check; 3337 } 3338 3339 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) 3340 { 3341 return vdev->vq[n].vring.desc; 3342 } 3343 3344 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n) 3345 { 3346 return virtio_queue_get_desc_addr(vdev, n) != 0; 3347 } 3348 3349 bool virtio_queue_enabled(VirtIODevice *vdev, int n) 3350 { 3351 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 3352 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 3353 3354 if (k->queue_enabled) { 3355 return k->queue_enabled(qbus->parent, n); 3356 } 3357 return virtio_queue_enabled_legacy(vdev, n); 3358 } 3359 3360 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n) 3361 { 3362 return vdev->vq[n].vring.avail; 3363 } 3364 3365 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n) 3366 { 3367 return vdev->vq[n].vring.used; 3368 } 3369 3370 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) 3371 { 3372 return sizeof(VRingDesc) * vdev->vq[n].vring.num; 3373 } 3374 3375 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) 3376 { 3377 int s; 3378 3379 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 3380 return sizeof(struct VRingPackedDescEvent); 3381 } 3382 3383 s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; 3384 return offsetof(VRingAvail, ring) + 3385 sizeof(uint16_t) * vdev->vq[n].vring.num + s; 3386 } 3387 3388 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) 3389 { 3390 int s; 3391 3392 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 3393 return sizeof(struct VRingPackedDescEvent); 3394 } 3395 3396 s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; 3397 return offsetof(VRingUsed, ring) + 3398 sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s; 3399 } 3400 3401 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev, 3402 int n) 3403 { 3404 unsigned int avail, used; 3405 3406 avail = vdev->vq[n].last_avail_idx; 3407 avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15; 3408 3409 used = vdev->vq[n].used_idx; 3410 used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15; 3411 3412 return avail | used << 16; 3413 } 3414 3415 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev, 3416 int n) 3417 { 3418 return vdev->vq[n].last_avail_idx; 3419 } 3420 3421 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) 3422 { 3423 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 3424 return virtio_queue_packed_get_last_avail_idx(vdev, n); 3425 } else { 3426 return virtio_queue_split_get_last_avail_idx(vdev, n); 3427 } 3428 } 3429 3430 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev, 3431 int n, unsigned int idx) 3432 { 3433 struct VirtQueue *vq = &vdev->vq[n]; 3434 3435 vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff; 3436 vq->last_avail_wrap_counter = 3437 vq->shadow_avail_wrap_counter = !!(idx & 0x8000); 3438 idx >>= 16; 3439 vq->used_idx = idx & 0x7ffff; 3440 vq->used_wrap_counter = !!(idx & 0x8000); 3441 } 3442 3443 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev, 3444 int n, unsigned int idx) 3445 { 3446 vdev->vq[n].last_avail_idx = idx; 3447 vdev->vq[n].shadow_avail_idx = idx; 3448 } 3449 3450 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, 3451 unsigned int idx) 3452 { 3453 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 3454 virtio_queue_packed_set_last_avail_idx(vdev, n, idx); 3455 } else { 3456 virtio_queue_split_set_last_avail_idx(vdev, n, idx); 3457 } 3458 } 3459 3460 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev, 3461 int n) 3462 { 3463 /* We don't have a reference like avail idx in shared memory */ 3464 return; 3465 } 3466 3467 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev, 3468 int n) 3469 { 3470 RCU_READ_LOCK_GUARD(); 3471 if (vdev->vq[n].vring.desc) { 3472 vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]); 3473 vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx; 3474 } 3475 } 3476 3477 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n) 3478 { 3479 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 3480 virtio_queue_packed_restore_last_avail_idx(vdev, n); 3481 } else { 3482 virtio_queue_split_restore_last_avail_idx(vdev, n); 3483 } 3484 } 3485 3486 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n) 3487 { 3488 /* used idx was updated through set_last_avail_idx() */ 3489 return; 3490 } 3491 3492 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n) 3493 { 3494 RCU_READ_LOCK_GUARD(); 3495 if (vdev->vq[n].vring.desc) { 3496 vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]); 3497 } 3498 } 3499 3500 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n) 3501 { 3502 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 3503 return virtio_queue_packed_update_used_idx(vdev, n); 3504 } else { 3505 return virtio_split_packed_update_used_idx(vdev, n); 3506 } 3507 } 3508 3509 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n) 3510 { 3511 vdev->vq[n].signalled_used_valid = false; 3512 } 3513 3514 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n) 3515 { 3516 return vdev->vq + n; 3517 } 3518 3519 uint16_t virtio_get_queue_index(VirtQueue *vq) 3520 { 3521 return vq->queue_index; 3522 } 3523 3524 static void virtio_queue_guest_notifier_read(EventNotifier *n) 3525 { 3526 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier); 3527 if (event_notifier_test_and_clear(n)) { 3528 virtio_irq(vq); 3529 } 3530 } 3531 3532 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, 3533 bool with_irqfd) 3534 { 3535 if (assign && !with_irqfd) { 3536 event_notifier_set_handler(&vq->guest_notifier, 3537 virtio_queue_guest_notifier_read); 3538 } else { 3539 event_notifier_set_handler(&vq->guest_notifier, NULL); 3540 } 3541 if (!assign) { 3542 /* Test and clear notifier before closing it, 3543 * in case poll callback didn't have time to run. */ 3544 virtio_queue_guest_notifier_read(&vq->guest_notifier); 3545 } 3546 } 3547 3548 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) 3549 { 3550 return &vq->guest_notifier; 3551 } 3552 3553 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n) 3554 { 3555 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 3556 3557 virtio_queue_set_notification(vq, 0); 3558 } 3559 3560 static bool virtio_queue_host_notifier_aio_poll(void *opaque) 3561 { 3562 EventNotifier *n = opaque; 3563 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 3564 3565 return vq->vring.desc && !virtio_queue_empty(vq); 3566 } 3567 3568 static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n) 3569 { 3570 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 3571 3572 virtio_queue_notify_vq(vq); 3573 } 3574 3575 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) 3576 { 3577 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 3578 3579 /* Caller polls once more after this to catch requests that race with us */ 3580 virtio_queue_set_notification(vq, 1); 3581 } 3582 3583 void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) 3584 { 3585 aio_set_event_notifier(ctx, &vq->host_notifier, true, 3586 virtio_queue_host_notifier_read, 3587 virtio_queue_host_notifier_aio_poll, 3588 virtio_queue_host_notifier_aio_poll_ready); 3589 aio_set_event_notifier_poll(ctx, &vq->host_notifier, 3590 virtio_queue_host_notifier_aio_poll_begin, 3591 virtio_queue_host_notifier_aio_poll_end); 3592 } 3593 3594 /* 3595 * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use 3596 * this for rx virtqueues and similar cases where the virtqueue handler 3597 * function does not pop all elements. When the virtqueue is left non-empty 3598 * polling consumes CPU cycles and should not be used. 3599 */ 3600 void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx) 3601 { 3602 aio_set_event_notifier(ctx, &vq->host_notifier, true, 3603 virtio_queue_host_notifier_read, 3604 NULL, NULL); 3605 } 3606 3607 void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx) 3608 { 3609 aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL); 3610 /* Test and clear notifier before after disabling event, 3611 * in case poll callback didn't have time to run. */ 3612 virtio_queue_host_notifier_read(&vq->host_notifier); 3613 } 3614 3615 void virtio_queue_host_notifier_read(EventNotifier *n) 3616 { 3617 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 3618 if (event_notifier_test_and_clear(n)) { 3619 virtio_queue_notify_vq(vq); 3620 } 3621 } 3622 3623 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) 3624 { 3625 return &vq->host_notifier; 3626 } 3627 3628 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled) 3629 { 3630 vq->host_notifier_enabled = enabled; 3631 } 3632 3633 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n, 3634 MemoryRegion *mr, bool assign) 3635 { 3636 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 3637 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 3638 3639 if (k->set_host_notifier_mr) { 3640 return k->set_host_notifier_mr(qbus->parent, n, mr, assign); 3641 } 3642 3643 return -1; 3644 } 3645 3646 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) 3647 { 3648 g_free(vdev->bus_name); 3649 vdev->bus_name = g_strdup(bus_name); 3650 } 3651 3652 void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...) 3653 { 3654 va_list ap; 3655 3656 va_start(ap, fmt); 3657 error_vreport(fmt, ap); 3658 va_end(ap); 3659 3660 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3661 vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET; 3662 virtio_notify_config(vdev); 3663 } 3664 3665 vdev->broken = true; 3666 } 3667 3668 static void virtio_memory_listener_commit(MemoryListener *listener) 3669 { 3670 VirtIODevice *vdev = container_of(listener, VirtIODevice, listener); 3671 int i; 3672 3673 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 3674 if (vdev->vq[i].vring.num == 0) { 3675 break; 3676 } 3677 virtio_init_region_cache(vdev, i); 3678 } 3679 } 3680 3681 static void virtio_device_realize(DeviceState *dev, Error **errp) 3682 { 3683 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3684 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3685 Error *err = NULL; 3686 3687 /* Devices should either use vmsd or the load/save methods */ 3688 assert(!vdc->vmsd || !vdc->load); 3689 3690 if (vdc->realize != NULL) { 3691 vdc->realize(dev, &err); 3692 if (err != NULL) { 3693 error_propagate(errp, err); 3694 return; 3695 } 3696 } 3697 3698 virtio_bus_device_plugged(vdev, &err); 3699 if (err != NULL) { 3700 error_propagate(errp, err); 3701 vdc->unrealize(dev); 3702 return; 3703 } 3704 3705 vdev->listener.commit = virtio_memory_listener_commit; 3706 vdev->listener.name = "virtio"; 3707 memory_listener_register(&vdev->listener, vdev->dma_as); 3708 } 3709 3710 static void virtio_device_unrealize(DeviceState *dev) 3711 { 3712 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3713 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3714 3715 memory_listener_unregister(&vdev->listener); 3716 virtio_bus_device_unplugged(vdev); 3717 3718 if (vdc->unrealize != NULL) { 3719 vdc->unrealize(dev); 3720 } 3721 3722 g_free(vdev->bus_name); 3723 vdev->bus_name = NULL; 3724 } 3725 3726 static void virtio_device_free_virtqueues(VirtIODevice *vdev) 3727 { 3728 int i; 3729 if (!vdev->vq) { 3730 return; 3731 } 3732 3733 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 3734 if (vdev->vq[i].vring.num == 0) { 3735 break; 3736 } 3737 virtio_virtqueue_reset_region_cache(&vdev->vq[i]); 3738 } 3739 g_free(vdev->vq); 3740 } 3741 3742 static void virtio_device_instance_finalize(Object *obj) 3743 { 3744 VirtIODevice *vdev = VIRTIO_DEVICE(obj); 3745 3746 virtio_device_free_virtqueues(vdev); 3747 3748 g_free(vdev->config); 3749 g_free(vdev->vector_queues); 3750 } 3751 3752 static Property virtio_properties[] = { 3753 DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features), 3754 DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true), 3755 DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true), 3756 DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice, 3757 disable_legacy_check, false), 3758 DEFINE_PROP_END_OF_LIST(), 3759 }; 3760 3761 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev) 3762 { 3763 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); 3764 int i, n, r, err; 3765 3766 /* 3767 * Batch all the host notifiers in a single transaction to avoid 3768 * quadratic time complexity in address_space_update_ioeventfds(). 3769 */ 3770 memory_region_transaction_begin(); 3771 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { 3772 VirtQueue *vq = &vdev->vq[n]; 3773 if (!virtio_queue_get_num(vdev, n)) { 3774 continue; 3775 } 3776 r = virtio_bus_set_host_notifier(qbus, n, true); 3777 if (r < 0) { 3778 err = r; 3779 goto assign_error; 3780 } 3781 event_notifier_set_handler(&vq->host_notifier, 3782 virtio_queue_host_notifier_read); 3783 } 3784 3785 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { 3786 /* Kick right away to begin processing requests already in vring */ 3787 VirtQueue *vq = &vdev->vq[n]; 3788 if (!vq->vring.num) { 3789 continue; 3790 } 3791 event_notifier_set(&vq->host_notifier); 3792 } 3793 memory_region_transaction_commit(); 3794 return 0; 3795 3796 assign_error: 3797 i = n; /* save n for a second iteration after transaction is committed. */ 3798 while (--n >= 0) { 3799 VirtQueue *vq = &vdev->vq[n]; 3800 if (!virtio_queue_get_num(vdev, n)) { 3801 continue; 3802 } 3803 3804 event_notifier_set_handler(&vq->host_notifier, NULL); 3805 r = virtio_bus_set_host_notifier(qbus, n, false); 3806 assert(r >= 0); 3807 } 3808 /* 3809 * The transaction expects the ioeventfds to be open when it 3810 * commits. Do it now, before the cleanup loop. 3811 */ 3812 memory_region_transaction_commit(); 3813 3814 while (--i >= 0) { 3815 if (!virtio_queue_get_num(vdev, i)) { 3816 continue; 3817 } 3818 virtio_bus_cleanup_host_notifier(qbus, i); 3819 } 3820 return err; 3821 } 3822 3823 int virtio_device_start_ioeventfd(VirtIODevice *vdev) 3824 { 3825 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 3826 VirtioBusState *vbus = VIRTIO_BUS(qbus); 3827 3828 return virtio_bus_start_ioeventfd(vbus); 3829 } 3830 3831 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev) 3832 { 3833 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); 3834 int n, r; 3835 3836 /* 3837 * Batch all the host notifiers in a single transaction to avoid 3838 * quadratic time complexity in address_space_update_ioeventfds(). 3839 */ 3840 memory_region_transaction_begin(); 3841 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { 3842 VirtQueue *vq = &vdev->vq[n]; 3843 3844 if (!virtio_queue_get_num(vdev, n)) { 3845 continue; 3846 } 3847 event_notifier_set_handler(&vq->host_notifier, NULL); 3848 r = virtio_bus_set_host_notifier(qbus, n, false); 3849 assert(r >= 0); 3850 } 3851 /* 3852 * The transaction expects the ioeventfds to be open when it 3853 * commits. Do it now, before the cleanup loop. 3854 */ 3855 memory_region_transaction_commit(); 3856 3857 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { 3858 if (!virtio_queue_get_num(vdev, n)) { 3859 continue; 3860 } 3861 virtio_bus_cleanup_host_notifier(qbus, n); 3862 } 3863 } 3864 3865 int virtio_device_grab_ioeventfd(VirtIODevice *vdev) 3866 { 3867 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 3868 VirtioBusState *vbus = VIRTIO_BUS(qbus); 3869 3870 return virtio_bus_grab_ioeventfd(vbus); 3871 } 3872 3873 void virtio_device_release_ioeventfd(VirtIODevice *vdev) 3874 { 3875 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 3876 VirtioBusState *vbus = VIRTIO_BUS(qbus); 3877 3878 virtio_bus_release_ioeventfd(vbus); 3879 } 3880 3881 static void virtio_device_class_init(ObjectClass *klass, void *data) 3882 { 3883 /* Set the default value here. */ 3884 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 3885 DeviceClass *dc = DEVICE_CLASS(klass); 3886 3887 dc->realize = virtio_device_realize; 3888 dc->unrealize = virtio_device_unrealize; 3889 dc->bus_type = TYPE_VIRTIO_BUS; 3890 device_class_set_props(dc, virtio_properties); 3891 vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl; 3892 vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl; 3893 3894 vdc->legacy_features |= VIRTIO_LEGACY_FEATURES; 3895 } 3896 3897 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev) 3898 { 3899 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 3900 VirtioBusState *vbus = VIRTIO_BUS(qbus); 3901 3902 return virtio_bus_ioeventfd_enabled(vbus); 3903 } 3904 3905 static const TypeInfo virtio_device_info = { 3906 .name = TYPE_VIRTIO_DEVICE, 3907 .parent = TYPE_DEVICE, 3908 .instance_size = sizeof(VirtIODevice), 3909 .class_init = virtio_device_class_init, 3910 .instance_finalize = virtio_device_instance_finalize, 3911 .abstract = true, 3912 .class_size = sizeof(VirtioDeviceClass), 3913 }; 3914 3915 static void virtio_register_types(void) 3916 { 3917 type_register_static(&virtio_device_info); 3918 } 3919 3920 type_init(virtio_register_types) 3921