1 /* 2 * Virtio Support 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qapi/error.h" 16 #include "cpu.h" 17 #include "trace.h" 18 #include "exec/address-spaces.h" 19 #include "qemu/error-report.h" 20 #include "qemu/main-loop.h" 21 #include "qemu/module.h" 22 #include "hw/virtio/virtio.h" 23 #include "migration/qemu-file-types.h" 24 #include "qemu/atomic.h" 25 #include "hw/virtio/virtio-bus.h" 26 #include "hw/qdev-properties.h" 27 #include "hw/virtio/virtio-access.h" 28 #include "sysemu/dma.h" 29 #include "sysemu/runstate.h" 30 31 /* 32 * The alignment to use between consumer and producer parts of vring. 33 * x86 pagesize again. This is the default, used by transports like PCI 34 * which don't provide a means for the guest to tell the host the alignment. 35 */ 36 #define VIRTIO_PCI_VRING_ALIGN 4096 37 38 typedef struct VRingDesc 39 { 40 uint64_t addr; 41 uint32_t len; 42 uint16_t flags; 43 uint16_t next; 44 } VRingDesc; 45 46 typedef struct VRingPackedDesc { 47 uint64_t addr; 48 uint32_t len; 49 uint16_t id; 50 uint16_t flags; 51 } VRingPackedDesc; 52 53 typedef struct VRingAvail 54 { 55 uint16_t flags; 56 uint16_t idx; 57 uint16_t ring[0]; 58 } VRingAvail; 59 60 typedef struct VRingUsedElem 61 { 62 uint32_t id; 63 uint32_t len; 64 } VRingUsedElem; 65 66 typedef struct VRingUsed 67 { 68 uint16_t flags; 69 uint16_t idx; 70 VRingUsedElem ring[0]; 71 } VRingUsed; 72 73 typedef struct VRingMemoryRegionCaches { 74 struct rcu_head rcu; 75 MemoryRegionCache desc; 76 MemoryRegionCache avail; 77 MemoryRegionCache used; 78 } VRingMemoryRegionCaches; 79 80 typedef struct VRing 81 { 82 unsigned int num; 83 unsigned int num_default; 84 unsigned int align; 85 hwaddr desc; 86 hwaddr avail; 87 hwaddr used; 88 VRingMemoryRegionCaches *caches; 89 } VRing; 90 91 typedef struct VRingPackedDescEvent { 92 uint16_t off_wrap; 93 uint16_t flags; 94 } VRingPackedDescEvent ; 95 96 struct VirtQueue 97 { 98 VRing vring; 99 VirtQueueElement *used_elems; 100 101 /* Next head to pop */ 102 uint16_t last_avail_idx; 103 bool last_avail_wrap_counter; 104 105 /* Last avail_idx read from VQ. */ 106 uint16_t shadow_avail_idx; 107 bool shadow_avail_wrap_counter; 108 109 uint16_t used_idx; 110 bool used_wrap_counter; 111 112 /* Last used index value we have signalled on */ 113 uint16_t signalled_used; 114 115 /* Last used index value we have signalled on */ 116 bool signalled_used_valid; 117 118 /* Notification enabled? */ 119 bool notification; 120 121 uint16_t queue_index; 122 123 unsigned int inuse; 124 125 uint16_t vector; 126 VirtIOHandleOutput handle_output; 127 VirtIOHandleAIOOutput handle_aio_output; 128 VirtIODevice *vdev; 129 EventNotifier guest_notifier; 130 EventNotifier host_notifier; 131 QLIST_ENTRY(VirtQueue) node; 132 }; 133 134 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches) 135 { 136 if (!caches) { 137 return; 138 } 139 140 address_space_cache_destroy(&caches->desc); 141 address_space_cache_destroy(&caches->avail); 142 address_space_cache_destroy(&caches->used); 143 g_free(caches); 144 } 145 146 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq) 147 { 148 VRingMemoryRegionCaches *caches; 149 150 caches = atomic_read(&vq->vring.caches); 151 atomic_rcu_set(&vq->vring.caches, NULL); 152 if (caches) { 153 call_rcu(caches, virtio_free_region_cache, rcu); 154 } 155 } 156 157 static void virtio_init_region_cache(VirtIODevice *vdev, int n) 158 { 159 VirtQueue *vq = &vdev->vq[n]; 160 VRingMemoryRegionCaches *old = vq->vring.caches; 161 VRingMemoryRegionCaches *new = NULL; 162 hwaddr addr, size; 163 int64_t len; 164 bool packed; 165 166 167 addr = vq->vring.desc; 168 if (!addr) { 169 goto out_no_cache; 170 } 171 new = g_new0(VRingMemoryRegionCaches, 1); 172 size = virtio_queue_get_desc_size(vdev, n); 173 packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ? 174 true : false; 175 len = address_space_cache_init(&new->desc, vdev->dma_as, 176 addr, size, packed); 177 if (len < size) { 178 virtio_error(vdev, "Cannot map desc"); 179 goto err_desc; 180 } 181 182 size = virtio_queue_get_used_size(vdev, n); 183 len = address_space_cache_init(&new->used, vdev->dma_as, 184 vq->vring.used, size, true); 185 if (len < size) { 186 virtio_error(vdev, "Cannot map used"); 187 goto err_used; 188 } 189 190 size = virtio_queue_get_avail_size(vdev, n); 191 len = address_space_cache_init(&new->avail, vdev->dma_as, 192 vq->vring.avail, size, false); 193 if (len < size) { 194 virtio_error(vdev, "Cannot map avail"); 195 goto err_avail; 196 } 197 198 atomic_rcu_set(&vq->vring.caches, new); 199 if (old) { 200 call_rcu(old, virtio_free_region_cache, rcu); 201 } 202 return; 203 204 err_avail: 205 address_space_cache_destroy(&new->avail); 206 err_used: 207 address_space_cache_destroy(&new->used); 208 err_desc: 209 address_space_cache_destroy(&new->desc); 210 out_no_cache: 211 g_free(new); 212 virtio_virtqueue_reset_region_cache(vq); 213 } 214 215 /* virt queue functions */ 216 void virtio_queue_update_rings(VirtIODevice *vdev, int n) 217 { 218 VRing *vring = &vdev->vq[n].vring; 219 220 if (!vring->num || !vring->desc || !vring->align) { 221 /* not yet setup -> nothing to do */ 222 return; 223 } 224 vring->avail = vring->desc + vring->num * sizeof(VRingDesc); 225 vring->used = vring_align(vring->avail + 226 offsetof(VRingAvail, ring[vring->num]), 227 vring->align); 228 virtio_init_region_cache(vdev, n); 229 } 230 231 /* Called within rcu_read_lock(). */ 232 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc, 233 MemoryRegionCache *cache, int i) 234 { 235 address_space_read_cached(cache, i * sizeof(VRingDesc), 236 desc, sizeof(VRingDesc)); 237 virtio_tswap64s(vdev, &desc->addr); 238 virtio_tswap32s(vdev, &desc->len); 239 virtio_tswap16s(vdev, &desc->flags); 240 virtio_tswap16s(vdev, &desc->next); 241 } 242 243 static void vring_packed_event_read(VirtIODevice *vdev, 244 MemoryRegionCache *cache, 245 VRingPackedDescEvent *e) 246 { 247 hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap); 248 hwaddr off_flags = offsetof(VRingPackedDescEvent, flags); 249 250 address_space_read_cached(cache, off_flags, &e->flags, 251 sizeof(e->flags)); 252 /* Make sure flags is seen before off_wrap */ 253 smp_rmb(); 254 address_space_read_cached(cache, off_off, &e->off_wrap, 255 sizeof(e->off_wrap)); 256 virtio_tswap16s(vdev, &e->off_wrap); 257 virtio_tswap16s(vdev, &e->flags); 258 } 259 260 static void vring_packed_off_wrap_write(VirtIODevice *vdev, 261 MemoryRegionCache *cache, 262 uint16_t off_wrap) 263 { 264 hwaddr off = offsetof(VRingPackedDescEvent, off_wrap); 265 266 virtio_tswap16s(vdev, &off_wrap); 267 address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap)); 268 address_space_cache_invalidate(cache, off, sizeof(off_wrap)); 269 } 270 271 static void vring_packed_flags_write(VirtIODevice *vdev, 272 MemoryRegionCache *cache, uint16_t flags) 273 { 274 hwaddr off = offsetof(VRingPackedDescEvent, flags); 275 276 virtio_tswap16s(vdev, &flags); 277 address_space_write_cached(cache, off, &flags, sizeof(flags)); 278 address_space_cache_invalidate(cache, off, sizeof(flags)); 279 } 280 281 /* Called within rcu_read_lock(). */ 282 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq) 283 { 284 VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches); 285 assert(caches != NULL); 286 return caches; 287 } 288 /* Called within rcu_read_lock(). */ 289 static inline uint16_t vring_avail_flags(VirtQueue *vq) 290 { 291 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); 292 hwaddr pa = offsetof(VRingAvail, flags); 293 return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa); 294 } 295 296 /* Called within rcu_read_lock(). */ 297 static inline uint16_t vring_avail_idx(VirtQueue *vq) 298 { 299 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); 300 hwaddr pa = offsetof(VRingAvail, idx); 301 vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa); 302 return vq->shadow_avail_idx; 303 } 304 305 /* Called within rcu_read_lock(). */ 306 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) 307 { 308 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); 309 hwaddr pa = offsetof(VRingAvail, ring[i]); 310 return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa); 311 } 312 313 /* Called within rcu_read_lock(). */ 314 static inline uint16_t vring_get_used_event(VirtQueue *vq) 315 { 316 return vring_avail_ring(vq, vq->vring.num); 317 } 318 319 /* Called within rcu_read_lock(). */ 320 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem, 321 int i) 322 { 323 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); 324 hwaddr pa = offsetof(VRingUsed, ring[i]); 325 virtio_tswap32s(vq->vdev, &uelem->id); 326 virtio_tswap32s(vq->vdev, &uelem->len); 327 address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem)); 328 address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem)); 329 } 330 331 /* Called within rcu_read_lock(). */ 332 static uint16_t vring_used_idx(VirtQueue *vq) 333 { 334 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); 335 hwaddr pa = offsetof(VRingUsed, idx); 336 return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); 337 } 338 339 /* Called within rcu_read_lock(). */ 340 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) 341 { 342 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); 343 hwaddr pa = offsetof(VRingUsed, idx); 344 virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val); 345 address_space_cache_invalidate(&caches->used, pa, sizeof(val)); 346 vq->used_idx = val; 347 } 348 349 /* Called within rcu_read_lock(). */ 350 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) 351 { 352 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); 353 VirtIODevice *vdev = vq->vdev; 354 hwaddr pa = offsetof(VRingUsed, flags); 355 uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); 356 357 virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask); 358 address_space_cache_invalidate(&caches->used, pa, sizeof(flags)); 359 } 360 361 /* Called within rcu_read_lock(). */ 362 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) 363 { 364 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); 365 VirtIODevice *vdev = vq->vdev; 366 hwaddr pa = offsetof(VRingUsed, flags); 367 uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); 368 369 virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask); 370 address_space_cache_invalidate(&caches->used, pa, sizeof(flags)); 371 } 372 373 /* Called within rcu_read_lock(). */ 374 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) 375 { 376 VRingMemoryRegionCaches *caches; 377 hwaddr pa; 378 if (!vq->notification) { 379 return; 380 } 381 382 caches = vring_get_region_caches(vq); 383 pa = offsetof(VRingUsed, ring[vq->vring.num]); 384 virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val); 385 address_space_cache_invalidate(&caches->used, pa, sizeof(val)); 386 } 387 388 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable) 389 { 390 rcu_read_lock(); 391 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { 392 vring_set_avail_event(vq, vring_avail_idx(vq)); 393 } else if (enable) { 394 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); 395 } else { 396 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); 397 } 398 if (enable) { 399 /* Expose avail event/used flags before caller checks the avail idx. */ 400 smp_mb(); 401 } 402 rcu_read_unlock(); 403 } 404 405 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable) 406 { 407 uint16_t off_wrap; 408 VRingPackedDescEvent e; 409 VRingMemoryRegionCaches *caches; 410 411 rcu_read_lock(); 412 caches = vring_get_region_caches(vq); 413 vring_packed_event_read(vq->vdev, &caches->used, &e); 414 415 if (!enable) { 416 e.flags = VRING_PACKED_EVENT_FLAG_DISABLE; 417 } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { 418 off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15; 419 vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap); 420 /* Make sure off_wrap is wrote before flags */ 421 smp_wmb(); 422 e.flags = VRING_PACKED_EVENT_FLAG_DESC; 423 } else { 424 e.flags = VRING_PACKED_EVENT_FLAG_ENABLE; 425 } 426 427 vring_packed_flags_write(vq->vdev, &caches->used, e.flags); 428 if (enable) { 429 /* Expose avail event/used flags before caller checks the avail idx. */ 430 smp_mb(); 431 } 432 rcu_read_unlock(); 433 } 434 435 void virtio_queue_set_notification(VirtQueue *vq, int enable) 436 { 437 vq->notification = enable; 438 439 if (!vq->vring.desc) { 440 return; 441 } 442 443 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { 444 virtio_queue_packed_set_notification(vq, enable); 445 } else { 446 virtio_queue_split_set_notification(vq, enable); 447 } 448 } 449 450 int virtio_queue_ready(VirtQueue *vq) 451 { 452 return vq->vring.avail != 0; 453 } 454 455 static void vring_packed_desc_read_flags(VirtIODevice *vdev, 456 uint16_t *flags, 457 MemoryRegionCache *cache, 458 int i) 459 { 460 address_space_read_cached(cache, 461 i * sizeof(VRingPackedDesc) + 462 offsetof(VRingPackedDesc, flags), 463 flags, sizeof(*flags)); 464 virtio_tswap16s(vdev, flags); 465 } 466 467 static void vring_packed_desc_read(VirtIODevice *vdev, 468 VRingPackedDesc *desc, 469 MemoryRegionCache *cache, 470 int i, bool strict_order) 471 { 472 hwaddr off = i * sizeof(VRingPackedDesc); 473 474 vring_packed_desc_read_flags(vdev, &desc->flags, cache, i); 475 476 if (strict_order) { 477 /* Make sure flags is read before the rest fields. */ 478 smp_rmb(); 479 } 480 481 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr), 482 &desc->addr, sizeof(desc->addr)); 483 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id), 484 &desc->id, sizeof(desc->id)); 485 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len), 486 &desc->len, sizeof(desc->len)); 487 virtio_tswap64s(vdev, &desc->addr); 488 virtio_tswap16s(vdev, &desc->id); 489 virtio_tswap32s(vdev, &desc->len); 490 } 491 492 static void vring_packed_desc_write_data(VirtIODevice *vdev, 493 VRingPackedDesc *desc, 494 MemoryRegionCache *cache, 495 int i) 496 { 497 hwaddr off_id = i * sizeof(VRingPackedDesc) + 498 offsetof(VRingPackedDesc, id); 499 hwaddr off_len = i * sizeof(VRingPackedDesc) + 500 offsetof(VRingPackedDesc, len); 501 502 virtio_tswap32s(vdev, &desc->len); 503 virtio_tswap16s(vdev, &desc->id); 504 address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id)); 505 address_space_cache_invalidate(cache, off_id, sizeof(desc->id)); 506 address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len)); 507 address_space_cache_invalidate(cache, off_len, sizeof(desc->len)); 508 } 509 510 static void vring_packed_desc_write_flags(VirtIODevice *vdev, 511 VRingPackedDesc *desc, 512 MemoryRegionCache *cache, 513 int i) 514 { 515 hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags); 516 517 virtio_tswap16s(vdev, &desc->flags); 518 address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags)); 519 address_space_cache_invalidate(cache, off, sizeof(desc->flags)); 520 } 521 522 static void vring_packed_desc_write(VirtIODevice *vdev, 523 VRingPackedDesc *desc, 524 MemoryRegionCache *cache, 525 int i, bool strict_order) 526 { 527 vring_packed_desc_write_data(vdev, desc, cache, i); 528 if (strict_order) { 529 /* Make sure data is wrote before flags. */ 530 smp_wmb(); 531 } 532 vring_packed_desc_write_flags(vdev, desc, cache, i); 533 } 534 535 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter) 536 { 537 bool avail, used; 538 539 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 540 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 541 return (avail != used) && (avail == wrap_counter); 542 } 543 544 /* Fetch avail_idx from VQ memory only when we really need to know if 545 * guest has added some buffers. 546 * Called within rcu_read_lock(). */ 547 static int virtio_queue_empty_rcu(VirtQueue *vq) 548 { 549 if (unlikely(vq->vdev->broken)) { 550 return 1; 551 } 552 553 if (unlikely(!vq->vring.avail)) { 554 return 1; 555 } 556 557 if (vq->shadow_avail_idx != vq->last_avail_idx) { 558 return 0; 559 } 560 561 return vring_avail_idx(vq) == vq->last_avail_idx; 562 } 563 564 static int virtio_queue_split_empty(VirtQueue *vq) 565 { 566 bool empty; 567 568 if (unlikely(vq->vdev->broken)) { 569 return 1; 570 } 571 572 if (unlikely(!vq->vring.avail)) { 573 return 1; 574 } 575 576 if (vq->shadow_avail_idx != vq->last_avail_idx) { 577 return 0; 578 } 579 580 rcu_read_lock(); 581 empty = vring_avail_idx(vq) == vq->last_avail_idx; 582 rcu_read_unlock(); 583 return empty; 584 } 585 586 static int virtio_queue_packed_empty_rcu(VirtQueue *vq) 587 { 588 struct VRingPackedDesc desc; 589 VRingMemoryRegionCaches *cache; 590 591 if (unlikely(!vq->vring.desc)) { 592 return 1; 593 } 594 595 cache = vring_get_region_caches(vq); 596 vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc, 597 vq->last_avail_idx); 598 599 return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter); 600 } 601 602 static int virtio_queue_packed_empty(VirtQueue *vq) 603 { 604 bool empty; 605 606 rcu_read_lock(); 607 empty = virtio_queue_packed_empty_rcu(vq); 608 rcu_read_unlock(); 609 return empty; 610 } 611 612 int virtio_queue_empty(VirtQueue *vq) 613 { 614 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { 615 return virtio_queue_packed_empty(vq); 616 } else { 617 return virtio_queue_split_empty(vq); 618 } 619 } 620 621 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, 622 unsigned int len) 623 { 624 AddressSpace *dma_as = vq->vdev->dma_as; 625 unsigned int offset; 626 int i; 627 628 offset = 0; 629 for (i = 0; i < elem->in_num; i++) { 630 size_t size = MIN(len - offset, elem->in_sg[i].iov_len); 631 632 dma_memory_unmap(dma_as, elem->in_sg[i].iov_base, 633 elem->in_sg[i].iov_len, 634 DMA_DIRECTION_FROM_DEVICE, size); 635 636 offset += size; 637 } 638 639 for (i = 0; i < elem->out_num; i++) 640 dma_memory_unmap(dma_as, elem->out_sg[i].iov_base, 641 elem->out_sg[i].iov_len, 642 DMA_DIRECTION_TO_DEVICE, 643 elem->out_sg[i].iov_len); 644 } 645 646 /* virtqueue_detach_element: 647 * @vq: The #VirtQueue 648 * @elem: The #VirtQueueElement 649 * @len: number of bytes written 650 * 651 * Detach the element from the virtqueue. This function is suitable for device 652 * reset or other situations where a #VirtQueueElement is simply freed and will 653 * not be pushed or discarded. 654 */ 655 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem, 656 unsigned int len) 657 { 658 vq->inuse -= elem->ndescs; 659 virtqueue_unmap_sg(vq, elem, len); 660 } 661 662 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num) 663 { 664 vq->last_avail_idx -= num; 665 } 666 667 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num) 668 { 669 if (vq->last_avail_idx < num) { 670 vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num; 671 vq->last_avail_wrap_counter ^= 1; 672 } else { 673 vq->last_avail_idx -= num; 674 } 675 } 676 677 /* virtqueue_unpop: 678 * @vq: The #VirtQueue 679 * @elem: The #VirtQueueElement 680 * @len: number of bytes written 681 * 682 * Pretend the most recent element wasn't popped from the virtqueue. The next 683 * call to virtqueue_pop() will refetch the element. 684 */ 685 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem, 686 unsigned int len) 687 { 688 689 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { 690 virtqueue_packed_rewind(vq, 1); 691 } else { 692 virtqueue_split_rewind(vq, 1); 693 } 694 695 virtqueue_detach_element(vq, elem, len); 696 } 697 698 /* virtqueue_rewind: 699 * @vq: The #VirtQueue 700 * @num: Number of elements to push back 701 * 702 * Pretend that elements weren't popped from the virtqueue. The next 703 * virtqueue_pop() will refetch the oldest element. 704 * 705 * Use virtqueue_unpop() instead if you have a VirtQueueElement. 706 * 707 * Returns: true on success, false if @num is greater than the number of in use 708 * elements. 709 */ 710 bool virtqueue_rewind(VirtQueue *vq, unsigned int num) 711 { 712 if (num > vq->inuse) { 713 return false; 714 } 715 716 vq->inuse -= num; 717 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { 718 virtqueue_packed_rewind(vq, num); 719 } else { 720 virtqueue_split_rewind(vq, num); 721 } 722 return true; 723 } 724 725 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem, 726 unsigned int len, unsigned int idx) 727 { 728 VRingUsedElem uelem; 729 730 if (unlikely(!vq->vring.used)) { 731 return; 732 } 733 734 idx = (idx + vq->used_idx) % vq->vring.num; 735 736 uelem.id = elem->index; 737 uelem.len = len; 738 vring_used_write(vq, &uelem, idx); 739 } 740 741 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem, 742 unsigned int len, unsigned int idx) 743 { 744 vq->used_elems[idx].index = elem->index; 745 vq->used_elems[idx].len = len; 746 vq->used_elems[idx].ndescs = elem->ndescs; 747 } 748 749 static void virtqueue_packed_fill_desc(VirtQueue *vq, 750 const VirtQueueElement *elem, 751 unsigned int idx, 752 bool strict_order) 753 { 754 uint16_t head; 755 VRingMemoryRegionCaches *caches; 756 VRingPackedDesc desc = { 757 .id = elem->index, 758 .len = elem->len, 759 }; 760 bool wrap_counter = vq->used_wrap_counter; 761 762 if (unlikely(!vq->vring.desc)) { 763 return; 764 } 765 766 head = vq->used_idx + idx; 767 if (head >= vq->vring.num) { 768 head -= vq->vring.num; 769 wrap_counter ^= 1; 770 } 771 if (wrap_counter) { 772 desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL); 773 desc.flags |= (1 << VRING_PACKED_DESC_F_USED); 774 } else { 775 desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL); 776 desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED); 777 } 778 779 caches = vring_get_region_caches(vq); 780 vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order); 781 } 782 783 /* Called within rcu_read_lock(). */ 784 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, 785 unsigned int len, unsigned int idx) 786 { 787 trace_virtqueue_fill(vq, elem, len, idx); 788 789 virtqueue_unmap_sg(vq, elem, len); 790 791 if (unlikely(vq->vdev->broken)) { 792 return; 793 } 794 795 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { 796 virtqueue_packed_fill(vq, elem, len, idx); 797 } else { 798 virtqueue_split_fill(vq, elem, len, idx); 799 } 800 } 801 802 /* Called within rcu_read_lock(). */ 803 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count) 804 { 805 uint16_t old, new; 806 807 if (unlikely(!vq->vring.used)) { 808 return; 809 } 810 811 /* Make sure buffer is written before we update index. */ 812 smp_wmb(); 813 trace_virtqueue_flush(vq, count); 814 old = vq->used_idx; 815 new = old + count; 816 vring_used_idx_set(vq, new); 817 vq->inuse -= count; 818 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) 819 vq->signalled_used_valid = false; 820 } 821 822 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count) 823 { 824 unsigned int i, ndescs = 0; 825 826 if (unlikely(!vq->vring.desc)) { 827 return; 828 } 829 830 for (i = 1; i < count; i++) { 831 virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false); 832 ndescs += vq->used_elems[i].ndescs; 833 } 834 virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true); 835 ndescs += vq->used_elems[0].ndescs; 836 837 vq->inuse -= ndescs; 838 vq->used_idx += ndescs; 839 if (vq->used_idx >= vq->vring.num) { 840 vq->used_idx -= vq->vring.num; 841 vq->used_wrap_counter ^= 1; 842 } 843 } 844 845 void virtqueue_flush(VirtQueue *vq, unsigned int count) 846 { 847 if (unlikely(vq->vdev->broken)) { 848 vq->inuse -= count; 849 return; 850 } 851 852 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { 853 virtqueue_packed_flush(vq, count); 854 } else { 855 virtqueue_split_flush(vq, count); 856 } 857 } 858 859 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, 860 unsigned int len) 861 { 862 rcu_read_lock(); 863 virtqueue_fill(vq, elem, len, 0); 864 virtqueue_flush(vq, 1); 865 rcu_read_unlock(); 866 } 867 868 /* Called within rcu_read_lock(). */ 869 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) 870 { 871 uint16_t num_heads = vring_avail_idx(vq) - idx; 872 873 /* Check it isn't doing very strange things with descriptor numbers. */ 874 if (num_heads > vq->vring.num) { 875 virtio_error(vq->vdev, "Guest moved used index from %u to %u", 876 idx, vq->shadow_avail_idx); 877 return -EINVAL; 878 } 879 /* On success, callers read a descriptor at vq->last_avail_idx. 880 * Make sure descriptor read does not bypass avail index read. */ 881 if (num_heads) { 882 smp_rmb(); 883 } 884 885 return num_heads; 886 } 887 888 /* Called within rcu_read_lock(). */ 889 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx, 890 unsigned int *head) 891 { 892 /* Grab the next descriptor number they're advertising, and increment 893 * the index we've seen. */ 894 *head = vring_avail_ring(vq, idx % vq->vring.num); 895 896 /* If their number is silly, that's a fatal mistake. */ 897 if (*head >= vq->vring.num) { 898 virtio_error(vq->vdev, "Guest says index %u is available", *head); 899 return false; 900 } 901 902 return true; 903 } 904 905 enum { 906 VIRTQUEUE_READ_DESC_ERROR = -1, 907 VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */ 908 VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */ 909 }; 910 911 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, 912 MemoryRegionCache *desc_cache, 913 unsigned int max, unsigned int *next) 914 { 915 /* If this descriptor says it doesn't chain, we're done. */ 916 if (!(desc->flags & VRING_DESC_F_NEXT)) { 917 return VIRTQUEUE_READ_DESC_DONE; 918 } 919 920 /* Check they're not leading us off end of descriptors. */ 921 *next = desc->next; 922 /* Make sure compiler knows to grab that: we don't want it changing! */ 923 smp_wmb(); 924 925 if (*next >= max) { 926 virtio_error(vdev, "Desc next is %u", *next); 927 return VIRTQUEUE_READ_DESC_ERROR; 928 } 929 930 vring_split_desc_read(vdev, desc, desc_cache, *next); 931 return VIRTQUEUE_READ_DESC_MORE; 932 } 933 934 static void virtqueue_split_get_avail_bytes(VirtQueue *vq, 935 unsigned int *in_bytes, unsigned int *out_bytes, 936 unsigned max_in_bytes, unsigned max_out_bytes) 937 { 938 VirtIODevice *vdev = vq->vdev; 939 unsigned int max, idx; 940 unsigned int total_bufs, in_total, out_total; 941 VRingMemoryRegionCaches *caches; 942 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; 943 int64_t len = 0; 944 int rc; 945 946 rcu_read_lock(); 947 idx = vq->last_avail_idx; 948 total_bufs = in_total = out_total = 0; 949 950 max = vq->vring.num; 951 caches = vring_get_region_caches(vq); 952 while ((rc = virtqueue_num_heads(vq, idx)) > 0) { 953 MemoryRegionCache *desc_cache = &caches->desc; 954 unsigned int num_bufs; 955 VRingDesc desc; 956 unsigned int i; 957 958 num_bufs = total_bufs; 959 960 if (!virtqueue_get_head(vq, idx++, &i)) { 961 goto err; 962 } 963 964 vring_split_desc_read(vdev, &desc, desc_cache, i); 965 966 if (desc.flags & VRING_DESC_F_INDIRECT) { 967 if (!desc.len || (desc.len % sizeof(VRingDesc))) { 968 virtio_error(vdev, "Invalid size for indirect buffer table"); 969 goto err; 970 } 971 972 /* If we've got too many, that implies a descriptor loop. */ 973 if (num_bufs >= max) { 974 virtio_error(vdev, "Looped descriptor"); 975 goto err; 976 } 977 978 /* loop over the indirect descriptor table */ 979 len = address_space_cache_init(&indirect_desc_cache, 980 vdev->dma_as, 981 desc.addr, desc.len, false); 982 desc_cache = &indirect_desc_cache; 983 if (len < desc.len) { 984 virtio_error(vdev, "Cannot map indirect buffer"); 985 goto err; 986 } 987 988 max = desc.len / sizeof(VRingDesc); 989 num_bufs = i = 0; 990 vring_split_desc_read(vdev, &desc, desc_cache, i); 991 } 992 993 do { 994 /* If we've got too many, that implies a descriptor loop. */ 995 if (++num_bufs > max) { 996 virtio_error(vdev, "Looped descriptor"); 997 goto err; 998 } 999 1000 if (desc.flags & VRING_DESC_F_WRITE) { 1001 in_total += desc.len; 1002 } else { 1003 out_total += desc.len; 1004 } 1005 if (in_total >= max_in_bytes && out_total >= max_out_bytes) { 1006 goto done; 1007 } 1008 1009 rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i); 1010 } while (rc == VIRTQUEUE_READ_DESC_MORE); 1011 1012 if (rc == VIRTQUEUE_READ_DESC_ERROR) { 1013 goto err; 1014 } 1015 1016 if (desc_cache == &indirect_desc_cache) { 1017 address_space_cache_destroy(&indirect_desc_cache); 1018 total_bufs++; 1019 } else { 1020 total_bufs = num_bufs; 1021 } 1022 } 1023 1024 if (rc < 0) { 1025 goto err; 1026 } 1027 1028 done: 1029 address_space_cache_destroy(&indirect_desc_cache); 1030 if (in_bytes) { 1031 *in_bytes = in_total; 1032 } 1033 if (out_bytes) { 1034 *out_bytes = out_total; 1035 } 1036 rcu_read_unlock(); 1037 return; 1038 1039 err: 1040 in_total = out_total = 0; 1041 goto done; 1042 } 1043 1044 static int virtqueue_packed_read_next_desc(VirtQueue *vq, 1045 VRingPackedDesc *desc, 1046 MemoryRegionCache 1047 *desc_cache, 1048 unsigned int max, 1049 unsigned int *next, 1050 bool indirect) 1051 { 1052 /* If this descriptor says it doesn't chain, we're done. */ 1053 if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) { 1054 return VIRTQUEUE_READ_DESC_DONE; 1055 } 1056 1057 ++*next; 1058 if (*next == max) { 1059 if (indirect) { 1060 return VIRTQUEUE_READ_DESC_DONE; 1061 } else { 1062 (*next) -= vq->vring.num; 1063 } 1064 } 1065 1066 vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false); 1067 return VIRTQUEUE_READ_DESC_MORE; 1068 } 1069 1070 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq, 1071 unsigned int *in_bytes, 1072 unsigned int *out_bytes, 1073 unsigned max_in_bytes, 1074 unsigned max_out_bytes) 1075 { 1076 VirtIODevice *vdev = vq->vdev; 1077 unsigned int max, idx; 1078 unsigned int total_bufs, in_total, out_total; 1079 MemoryRegionCache *desc_cache; 1080 VRingMemoryRegionCaches *caches; 1081 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; 1082 int64_t len = 0; 1083 VRingPackedDesc desc; 1084 bool wrap_counter; 1085 1086 rcu_read_lock(); 1087 idx = vq->last_avail_idx; 1088 wrap_counter = vq->last_avail_wrap_counter; 1089 total_bufs = in_total = out_total = 0; 1090 1091 max = vq->vring.num; 1092 caches = vring_get_region_caches(vq); 1093 1094 for (;;) { 1095 unsigned int num_bufs = total_bufs; 1096 unsigned int i = idx; 1097 int rc; 1098 1099 desc_cache = &caches->desc; 1100 vring_packed_desc_read(vdev, &desc, desc_cache, idx, true); 1101 if (!is_desc_avail(desc.flags, wrap_counter)) { 1102 break; 1103 } 1104 1105 if (desc.flags & VRING_DESC_F_INDIRECT) { 1106 if (desc.len % sizeof(VRingPackedDesc)) { 1107 virtio_error(vdev, "Invalid size for indirect buffer table"); 1108 goto err; 1109 } 1110 1111 /* If we've got too many, that implies a descriptor loop. */ 1112 if (num_bufs >= max) { 1113 virtio_error(vdev, "Looped descriptor"); 1114 goto err; 1115 } 1116 1117 /* loop over the indirect descriptor table */ 1118 len = address_space_cache_init(&indirect_desc_cache, 1119 vdev->dma_as, 1120 desc.addr, desc.len, false); 1121 desc_cache = &indirect_desc_cache; 1122 if (len < desc.len) { 1123 virtio_error(vdev, "Cannot map indirect buffer"); 1124 goto err; 1125 } 1126 1127 max = desc.len / sizeof(VRingPackedDesc); 1128 num_bufs = i = 0; 1129 vring_packed_desc_read(vdev, &desc, desc_cache, i, false); 1130 } 1131 1132 do { 1133 /* If we've got too many, that implies a descriptor loop. */ 1134 if (++num_bufs > max) { 1135 virtio_error(vdev, "Looped descriptor"); 1136 goto err; 1137 } 1138 1139 if (desc.flags & VRING_DESC_F_WRITE) { 1140 in_total += desc.len; 1141 } else { 1142 out_total += desc.len; 1143 } 1144 if (in_total >= max_in_bytes && out_total >= max_out_bytes) { 1145 goto done; 1146 } 1147 1148 rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, 1149 &i, desc_cache == 1150 &indirect_desc_cache); 1151 } while (rc == VIRTQUEUE_READ_DESC_MORE); 1152 1153 if (desc_cache == &indirect_desc_cache) { 1154 address_space_cache_destroy(&indirect_desc_cache); 1155 total_bufs++; 1156 idx++; 1157 } else { 1158 idx += num_bufs - total_bufs; 1159 total_bufs = num_bufs; 1160 } 1161 1162 if (idx >= vq->vring.num) { 1163 idx -= vq->vring.num; 1164 wrap_counter ^= 1; 1165 } 1166 } 1167 1168 /* Record the index and wrap counter for a kick we want */ 1169 vq->shadow_avail_idx = idx; 1170 vq->shadow_avail_wrap_counter = wrap_counter; 1171 done: 1172 address_space_cache_destroy(&indirect_desc_cache); 1173 if (in_bytes) { 1174 *in_bytes = in_total; 1175 } 1176 if (out_bytes) { 1177 *out_bytes = out_total; 1178 } 1179 rcu_read_unlock(); 1180 return; 1181 1182 err: 1183 in_total = out_total = 0; 1184 goto done; 1185 } 1186 1187 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, 1188 unsigned int *out_bytes, 1189 unsigned max_in_bytes, unsigned max_out_bytes) 1190 { 1191 uint16_t desc_size; 1192 VRingMemoryRegionCaches *caches; 1193 1194 if (unlikely(!vq->vring.desc)) { 1195 goto err; 1196 } 1197 1198 caches = vring_get_region_caches(vq); 1199 desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ? 1200 sizeof(VRingPackedDesc) : sizeof(VRingDesc); 1201 if (caches->desc.len < vq->vring.num * desc_size) { 1202 virtio_error(vq->vdev, "Cannot map descriptor ring"); 1203 goto err; 1204 } 1205 1206 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { 1207 virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes, 1208 max_in_bytes, max_out_bytes); 1209 } else { 1210 virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes, 1211 max_in_bytes, max_out_bytes); 1212 } 1213 1214 return; 1215 err: 1216 if (in_bytes) { 1217 *in_bytes = 0; 1218 } 1219 if (out_bytes) { 1220 *out_bytes = 0; 1221 } 1222 } 1223 1224 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, 1225 unsigned int out_bytes) 1226 { 1227 unsigned int in_total, out_total; 1228 1229 virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes); 1230 return in_bytes <= in_total && out_bytes <= out_total; 1231 } 1232 1233 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg, 1234 hwaddr *addr, struct iovec *iov, 1235 unsigned int max_num_sg, bool is_write, 1236 hwaddr pa, size_t sz) 1237 { 1238 bool ok = false; 1239 unsigned num_sg = *p_num_sg; 1240 assert(num_sg <= max_num_sg); 1241 1242 if (!sz) { 1243 virtio_error(vdev, "virtio: zero sized buffers are not allowed"); 1244 goto out; 1245 } 1246 1247 while (sz) { 1248 hwaddr len = sz; 1249 1250 if (num_sg == max_num_sg) { 1251 virtio_error(vdev, "virtio: too many write descriptors in " 1252 "indirect table"); 1253 goto out; 1254 } 1255 1256 iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len, 1257 is_write ? 1258 DMA_DIRECTION_FROM_DEVICE : 1259 DMA_DIRECTION_TO_DEVICE); 1260 if (!iov[num_sg].iov_base) { 1261 virtio_error(vdev, "virtio: bogus descriptor or out of resources"); 1262 goto out; 1263 } 1264 1265 iov[num_sg].iov_len = len; 1266 addr[num_sg] = pa; 1267 1268 sz -= len; 1269 pa += len; 1270 num_sg++; 1271 } 1272 ok = true; 1273 1274 out: 1275 *p_num_sg = num_sg; 1276 return ok; 1277 } 1278 1279 /* Only used by error code paths before we have a VirtQueueElement (therefore 1280 * virtqueue_unmap_sg() can't be used). Assumes buffers weren't written to 1281 * yet. 1282 */ 1283 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num, 1284 struct iovec *iov) 1285 { 1286 unsigned int i; 1287 1288 for (i = 0; i < out_num + in_num; i++) { 1289 int is_write = i >= out_num; 1290 1291 cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0); 1292 iov++; 1293 } 1294 } 1295 1296 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg, 1297 hwaddr *addr, unsigned int num_sg, 1298 int is_write) 1299 { 1300 unsigned int i; 1301 hwaddr len; 1302 1303 for (i = 0; i < num_sg; i++) { 1304 len = sg[i].iov_len; 1305 sg[i].iov_base = dma_memory_map(vdev->dma_as, 1306 addr[i], &len, is_write ? 1307 DMA_DIRECTION_FROM_DEVICE : 1308 DMA_DIRECTION_TO_DEVICE); 1309 if (!sg[i].iov_base) { 1310 error_report("virtio: error trying to map MMIO memory"); 1311 exit(1); 1312 } 1313 if (len != sg[i].iov_len) { 1314 error_report("virtio: unexpected memory split"); 1315 exit(1); 1316 } 1317 } 1318 } 1319 1320 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem) 1321 { 1322 virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, 1); 1323 virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num, 0); 1324 } 1325 1326 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num) 1327 { 1328 VirtQueueElement *elem; 1329 size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0])); 1330 size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]); 1331 size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]); 1332 size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0])); 1333 size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]); 1334 size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]); 1335 1336 assert(sz >= sizeof(VirtQueueElement)); 1337 elem = g_malloc(out_sg_end); 1338 trace_virtqueue_alloc_element(elem, sz, in_num, out_num); 1339 elem->out_num = out_num; 1340 elem->in_num = in_num; 1341 elem->in_addr = (void *)elem + in_addr_ofs; 1342 elem->out_addr = (void *)elem + out_addr_ofs; 1343 elem->in_sg = (void *)elem + in_sg_ofs; 1344 elem->out_sg = (void *)elem + out_sg_ofs; 1345 return elem; 1346 } 1347 1348 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) 1349 { 1350 unsigned int i, head, max; 1351 VRingMemoryRegionCaches *caches; 1352 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; 1353 MemoryRegionCache *desc_cache; 1354 int64_t len; 1355 VirtIODevice *vdev = vq->vdev; 1356 VirtQueueElement *elem = NULL; 1357 unsigned out_num, in_num, elem_entries; 1358 hwaddr addr[VIRTQUEUE_MAX_SIZE]; 1359 struct iovec iov[VIRTQUEUE_MAX_SIZE]; 1360 VRingDesc desc; 1361 int rc; 1362 1363 rcu_read_lock(); 1364 if (virtio_queue_empty_rcu(vq)) { 1365 goto done; 1366 } 1367 /* Needed after virtio_queue_empty(), see comment in 1368 * virtqueue_num_heads(). */ 1369 smp_rmb(); 1370 1371 /* When we start there are none of either input nor output. */ 1372 out_num = in_num = elem_entries = 0; 1373 1374 max = vq->vring.num; 1375 1376 if (vq->inuse >= vq->vring.num) { 1377 virtio_error(vdev, "Virtqueue size exceeded"); 1378 goto done; 1379 } 1380 1381 if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) { 1382 goto done; 1383 } 1384 1385 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 1386 vring_set_avail_event(vq, vq->last_avail_idx); 1387 } 1388 1389 i = head; 1390 1391 caches = vring_get_region_caches(vq); 1392 if (caches->desc.len < max * sizeof(VRingDesc)) { 1393 virtio_error(vdev, "Cannot map descriptor ring"); 1394 goto done; 1395 } 1396 1397 desc_cache = &caches->desc; 1398 vring_split_desc_read(vdev, &desc, desc_cache, i); 1399 if (desc.flags & VRING_DESC_F_INDIRECT) { 1400 if (!desc.len || (desc.len % sizeof(VRingDesc))) { 1401 virtio_error(vdev, "Invalid size for indirect buffer table"); 1402 goto done; 1403 } 1404 1405 /* loop over the indirect descriptor table */ 1406 len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, 1407 desc.addr, desc.len, false); 1408 desc_cache = &indirect_desc_cache; 1409 if (len < desc.len) { 1410 virtio_error(vdev, "Cannot map indirect buffer"); 1411 goto done; 1412 } 1413 1414 max = desc.len / sizeof(VRingDesc); 1415 i = 0; 1416 vring_split_desc_read(vdev, &desc, desc_cache, i); 1417 } 1418 1419 /* Collect all the descriptors */ 1420 do { 1421 bool map_ok; 1422 1423 if (desc.flags & VRING_DESC_F_WRITE) { 1424 map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num, 1425 iov + out_num, 1426 VIRTQUEUE_MAX_SIZE - out_num, true, 1427 desc.addr, desc.len); 1428 } else { 1429 if (in_num) { 1430 virtio_error(vdev, "Incorrect order for descriptors"); 1431 goto err_undo_map; 1432 } 1433 map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov, 1434 VIRTQUEUE_MAX_SIZE, false, 1435 desc.addr, desc.len); 1436 } 1437 if (!map_ok) { 1438 goto err_undo_map; 1439 } 1440 1441 /* If we've got too many, that implies a descriptor loop. */ 1442 if (++elem_entries > max) { 1443 virtio_error(vdev, "Looped descriptor"); 1444 goto err_undo_map; 1445 } 1446 1447 rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i); 1448 } while (rc == VIRTQUEUE_READ_DESC_MORE); 1449 1450 if (rc == VIRTQUEUE_READ_DESC_ERROR) { 1451 goto err_undo_map; 1452 } 1453 1454 /* Now copy what we have collected and mapped */ 1455 elem = virtqueue_alloc_element(sz, out_num, in_num); 1456 elem->index = head; 1457 elem->ndescs = 1; 1458 for (i = 0; i < out_num; i++) { 1459 elem->out_addr[i] = addr[i]; 1460 elem->out_sg[i] = iov[i]; 1461 } 1462 for (i = 0; i < in_num; i++) { 1463 elem->in_addr[i] = addr[out_num + i]; 1464 elem->in_sg[i] = iov[out_num + i]; 1465 } 1466 1467 vq->inuse++; 1468 1469 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); 1470 done: 1471 address_space_cache_destroy(&indirect_desc_cache); 1472 rcu_read_unlock(); 1473 1474 return elem; 1475 1476 err_undo_map: 1477 virtqueue_undo_map_desc(out_num, in_num, iov); 1478 goto done; 1479 } 1480 1481 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) 1482 { 1483 unsigned int i, max; 1484 VRingMemoryRegionCaches *caches; 1485 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; 1486 MemoryRegionCache *desc_cache; 1487 int64_t len; 1488 VirtIODevice *vdev = vq->vdev; 1489 VirtQueueElement *elem = NULL; 1490 unsigned out_num, in_num, elem_entries; 1491 hwaddr addr[VIRTQUEUE_MAX_SIZE]; 1492 struct iovec iov[VIRTQUEUE_MAX_SIZE]; 1493 VRingPackedDesc desc; 1494 uint16_t id; 1495 int rc; 1496 1497 rcu_read_lock(); 1498 if (virtio_queue_packed_empty_rcu(vq)) { 1499 goto done; 1500 } 1501 1502 /* When we start there are none of either input nor output. */ 1503 out_num = in_num = elem_entries = 0; 1504 1505 max = vq->vring.num; 1506 1507 if (vq->inuse >= vq->vring.num) { 1508 virtio_error(vdev, "Virtqueue size exceeded"); 1509 goto done; 1510 } 1511 1512 i = vq->last_avail_idx; 1513 1514 caches = vring_get_region_caches(vq); 1515 if (caches->desc.len < max * sizeof(VRingDesc)) { 1516 virtio_error(vdev, "Cannot map descriptor ring"); 1517 goto done; 1518 } 1519 1520 desc_cache = &caches->desc; 1521 vring_packed_desc_read(vdev, &desc, desc_cache, i, true); 1522 id = desc.id; 1523 if (desc.flags & VRING_DESC_F_INDIRECT) { 1524 if (desc.len % sizeof(VRingPackedDesc)) { 1525 virtio_error(vdev, "Invalid size for indirect buffer table"); 1526 goto done; 1527 } 1528 1529 /* loop over the indirect descriptor table */ 1530 len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, 1531 desc.addr, desc.len, false); 1532 desc_cache = &indirect_desc_cache; 1533 if (len < desc.len) { 1534 virtio_error(vdev, "Cannot map indirect buffer"); 1535 goto done; 1536 } 1537 1538 max = desc.len / sizeof(VRingPackedDesc); 1539 i = 0; 1540 vring_packed_desc_read(vdev, &desc, desc_cache, i, false); 1541 } 1542 1543 /* Collect all the descriptors */ 1544 do { 1545 bool map_ok; 1546 1547 if (desc.flags & VRING_DESC_F_WRITE) { 1548 map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num, 1549 iov + out_num, 1550 VIRTQUEUE_MAX_SIZE - out_num, true, 1551 desc.addr, desc.len); 1552 } else { 1553 if (in_num) { 1554 virtio_error(vdev, "Incorrect order for descriptors"); 1555 goto err_undo_map; 1556 } 1557 map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov, 1558 VIRTQUEUE_MAX_SIZE, false, 1559 desc.addr, desc.len); 1560 } 1561 if (!map_ok) { 1562 goto err_undo_map; 1563 } 1564 1565 /* If we've got too many, that implies a descriptor loop. */ 1566 if (++elem_entries > max) { 1567 virtio_error(vdev, "Looped descriptor"); 1568 goto err_undo_map; 1569 } 1570 1571 rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i, 1572 desc_cache == 1573 &indirect_desc_cache); 1574 } while (rc == VIRTQUEUE_READ_DESC_MORE); 1575 1576 /* Now copy what we have collected and mapped */ 1577 elem = virtqueue_alloc_element(sz, out_num, in_num); 1578 for (i = 0; i < out_num; i++) { 1579 elem->out_addr[i] = addr[i]; 1580 elem->out_sg[i] = iov[i]; 1581 } 1582 for (i = 0; i < in_num; i++) { 1583 elem->in_addr[i] = addr[out_num + i]; 1584 elem->in_sg[i] = iov[out_num + i]; 1585 } 1586 1587 elem->index = id; 1588 elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries; 1589 vq->last_avail_idx += elem->ndescs; 1590 vq->inuse += elem->ndescs; 1591 1592 if (vq->last_avail_idx >= vq->vring.num) { 1593 vq->last_avail_idx -= vq->vring.num; 1594 vq->last_avail_wrap_counter ^= 1; 1595 } 1596 1597 vq->shadow_avail_idx = vq->last_avail_idx; 1598 vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter; 1599 1600 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); 1601 done: 1602 address_space_cache_destroy(&indirect_desc_cache); 1603 rcu_read_unlock(); 1604 1605 return elem; 1606 1607 err_undo_map: 1608 virtqueue_undo_map_desc(out_num, in_num, iov); 1609 goto done; 1610 } 1611 1612 void *virtqueue_pop(VirtQueue *vq, size_t sz) 1613 { 1614 if (unlikely(vq->vdev->broken)) { 1615 return NULL; 1616 } 1617 1618 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { 1619 return virtqueue_packed_pop(vq, sz); 1620 } else { 1621 return virtqueue_split_pop(vq, sz); 1622 } 1623 } 1624 1625 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq) 1626 { 1627 VRingMemoryRegionCaches *caches; 1628 MemoryRegionCache *desc_cache; 1629 unsigned int dropped = 0; 1630 VirtQueueElement elem = {}; 1631 VirtIODevice *vdev = vq->vdev; 1632 VRingPackedDesc desc; 1633 1634 caches = vring_get_region_caches(vq); 1635 desc_cache = &caches->desc; 1636 1637 virtio_queue_set_notification(vq, 0); 1638 1639 while (vq->inuse < vq->vring.num) { 1640 unsigned int idx = vq->last_avail_idx; 1641 /* 1642 * works similar to virtqueue_pop but does not map buffers 1643 * and does not allocate any memory. 1644 */ 1645 vring_packed_desc_read(vdev, &desc, desc_cache, 1646 vq->last_avail_idx , true); 1647 if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) { 1648 break; 1649 } 1650 elem.index = desc.id; 1651 elem.ndescs = 1; 1652 while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache, 1653 vq->vring.num, &idx, false)) { 1654 ++elem.ndescs; 1655 } 1656 /* 1657 * immediately push the element, nothing to unmap 1658 * as both in_num and out_num are set to 0. 1659 */ 1660 virtqueue_push(vq, &elem, 0); 1661 dropped++; 1662 vq->last_avail_idx += elem.ndescs; 1663 if (vq->last_avail_idx >= vq->vring.num) { 1664 vq->last_avail_idx -= vq->vring.num; 1665 vq->last_avail_wrap_counter ^= 1; 1666 } 1667 } 1668 1669 return dropped; 1670 } 1671 1672 static unsigned int virtqueue_split_drop_all(VirtQueue *vq) 1673 { 1674 unsigned int dropped = 0; 1675 VirtQueueElement elem = {}; 1676 VirtIODevice *vdev = vq->vdev; 1677 bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1678 1679 while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) { 1680 /* works similar to virtqueue_pop but does not map buffers 1681 * and does not allocate any memory */ 1682 smp_rmb(); 1683 if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) { 1684 break; 1685 } 1686 vq->inuse++; 1687 vq->last_avail_idx++; 1688 if (fEventIdx) { 1689 vring_set_avail_event(vq, vq->last_avail_idx); 1690 } 1691 /* immediately push the element, nothing to unmap 1692 * as both in_num and out_num are set to 0 */ 1693 virtqueue_push(vq, &elem, 0); 1694 dropped++; 1695 } 1696 1697 return dropped; 1698 } 1699 1700 /* virtqueue_drop_all: 1701 * @vq: The #VirtQueue 1702 * Drops all queued buffers and indicates them to the guest 1703 * as if they are done. Useful when buffers can not be 1704 * processed but must be returned to the guest. 1705 */ 1706 unsigned int virtqueue_drop_all(VirtQueue *vq) 1707 { 1708 struct VirtIODevice *vdev = vq->vdev; 1709 1710 if (unlikely(vdev->broken)) { 1711 return 0; 1712 } 1713 1714 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 1715 return virtqueue_packed_drop_all(vq); 1716 } else { 1717 return virtqueue_split_drop_all(vq); 1718 } 1719 } 1720 1721 /* Reading and writing a structure directly to QEMUFile is *awful*, but 1722 * it is what QEMU has always done by mistake. We can change it sooner 1723 * or later by bumping the version number of the affected vm states. 1724 * In the meanwhile, since the in-memory layout of VirtQueueElement 1725 * has changed, we need to marshal to and from the layout that was 1726 * used before the change. 1727 */ 1728 typedef struct VirtQueueElementOld { 1729 unsigned int index; 1730 unsigned int out_num; 1731 unsigned int in_num; 1732 hwaddr in_addr[VIRTQUEUE_MAX_SIZE]; 1733 hwaddr out_addr[VIRTQUEUE_MAX_SIZE]; 1734 struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; 1735 struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; 1736 } VirtQueueElementOld; 1737 1738 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz) 1739 { 1740 VirtQueueElement *elem; 1741 VirtQueueElementOld data; 1742 int i; 1743 1744 qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); 1745 1746 /* TODO: teach all callers that this can fail, and return failure instead 1747 * of asserting here. 1748 * This is just one thing (there are probably more) that must be 1749 * fixed before we can allow NDEBUG compilation. 1750 */ 1751 assert(ARRAY_SIZE(data.in_addr) >= data.in_num); 1752 assert(ARRAY_SIZE(data.out_addr) >= data.out_num); 1753 1754 elem = virtqueue_alloc_element(sz, data.out_num, data.in_num); 1755 elem->index = data.index; 1756 1757 for (i = 0; i < elem->in_num; i++) { 1758 elem->in_addr[i] = data.in_addr[i]; 1759 } 1760 1761 for (i = 0; i < elem->out_num; i++) { 1762 elem->out_addr[i] = data.out_addr[i]; 1763 } 1764 1765 for (i = 0; i < elem->in_num; i++) { 1766 /* Base is overwritten by virtqueue_map. */ 1767 elem->in_sg[i].iov_base = 0; 1768 elem->in_sg[i].iov_len = data.in_sg[i].iov_len; 1769 } 1770 1771 for (i = 0; i < elem->out_num; i++) { 1772 /* Base is overwritten by virtqueue_map. */ 1773 elem->out_sg[i].iov_base = 0; 1774 elem->out_sg[i].iov_len = data.out_sg[i].iov_len; 1775 } 1776 1777 if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 1778 qemu_get_be32s(f, &elem->ndescs); 1779 } 1780 1781 virtqueue_map(vdev, elem); 1782 return elem; 1783 } 1784 1785 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, 1786 VirtQueueElement *elem) 1787 { 1788 VirtQueueElementOld data; 1789 int i; 1790 1791 memset(&data, 0, sizeof(data)); 1792 data.index = elem->index; 1793 data.in_num = elem->in_num; 1794 data.out_num = elem->out_num; 1795 1796 for (i = 0; i < elem->in_num; i++) { 1797 data.in_addr[i] = elem->in_addr[i]; 1798 } 1799 1800 for (i = 0; i < elem->out_num; i++) { 1801 data.out_addr[i] = elem->out_addr[i]; 1802 } 1803 1804 for (i = 0; i < elem->in_num; i++) { 1805 /* Base is overwritten by virtqueue_map when loading. Do not 1806 * save it, as it would leak the QEMU address space layout. */ 1807 data.in_sg[i].iov_len = elem->in_sg[i].iov_len; 1808 } 1809 1810 for (i = 0; i < elem->out_num; i++) { 1811 /* Do not save iov_base as above. */ 1812 data.out_sg[i].iov_len = elem->out_sg[i].iov_len; 1813 } 1814 1815 if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 1816 qemu_put_be32s(f, &elem->ndescs); 1817 } 1818 1819 qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); 1820 } 1821 1822 /* virtio device */ 1823 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector) 1824 { 1825 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1826 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1827 1828 if (unlikely(vdev->broken)) { 1829 return; 1830 } 1831 1832 if (k->notify) { 1833 k->notify(qbus->parent, vector); 1834 } 1835 } 1836 1837 void virtio_update_irq(VirtIODevice *vdev) 1838 { 1839 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 1840 } 1841 1842 static int virtio_validate_features(VirtIODevice *vdev) 1843 { 1844 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1845 1846 if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) && 1847 !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) { 1848 return -EFAULT; 1849 } 1850 1851 if (k->validate_features) { 1852 return k->validate_features(vdev); 1853 } else { 1854 return 0; 1855 } 1856 } 1857 1858 int virtio_set_status(VirtIODevice *vdev, uint8_t val) 1859 { 1860 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1861 trace_virtio_set_status(vdev, val); 1862 1863 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 1864 if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) && 1865 val & VIRTIO_CONFIG_S_FEATURES_OK) { 1866 int ret = virtio_validate_features(vdev); 1867 1868 if (ret) { 1869 return ret; 1870 } 1871 } 1872 } 1873 1874 if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) != 1875 (val & VIRTIO_CONFIG_S_DRIVER_OK)) { 1876 virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK); 1877 } 1878 1879 if (k->set_status) { 1880 k->set_status(vdev, val); 1881 } 1882 vdev->status = val; 1883 1884 return 0; 1885 } 1886 1887 static enum virtio_device_endian virtio_default_endian(void) 1888 { 1889 if (target_words_bigendian()) { 1890 return VIRTIO_DEVICE_ENDIAN_BIG; 1891 } else { 1892 return VIRTIO_DEVICE_ENDIAN_LITTLE; 1893 } 1894 } 1895 1896 static enum virtio_device_endian virtio_current_cpu_endian(void) 1897 { 1898 CPUClass *cc = CPU_GET_CLASS(current_cpu); 1899 1900 if (cc->virtio_is_big_endian(current_cpu)) { 1901 return VIRTIO_DEVICE_ENDIAN_BIG; 1902 } else { 1903 return VIRTIO_DEVICE_ENDIAN_LITTLE; 1904 } 1905 } 1906 1907 void virtio_reset(void *opaque) 1908 { 1909 VirtIODevice *vdev = opaque; 1910 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1911 int i; 1912 1913 virtio_set_status(vdev, 0); 1914 if (current_cpu) { 1915 /* Guest initiated reset */ 1916 vdev->device_endian = virtio_current_cpu_endian(); 1917 } else { 1918 /* System reset */ 1919 vdev->device_endian = virtio_default_endian(); 1920 } 1921 1922 if (k->reset) { 1923 k->reset(vdev); 1924 } 1925 1926 vdev->start_on_kick = false; 1927 vdev->started = false; 1928 vdev->broken = false; 1929 vdev->guest_features = 0; 1930 vdev->queue_sel = 0; 1931 vdev->status = 0; 1932 atomic_set(&vdev->isr, 0); 1933 vdev->config_vector = VIRTIO_NO_VECTOR; 1934 virtio_notify_vector(vdev, vdev->config_vector); 1935 1936 for(i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1937 vdev->vq[i].vring.desc = 0; 1938 vdev->vq[i].vring.avail = 0; 1939 vdev->vq[i].vring.used = 0; 1940 vdev->vq[i].last_avail_idx = 0; 1941 vdev->vq[i].shadow_avail_idx = 0; 1942 vdev->vq[i].used_idx = 0; 1943 vdev->vq[i].last_avail_wrap_counter = true; 1944 vdev->vq[i].shadow_avail_wrap_counter = true; 1945 vdev->vq[i].used_wrap_counter = true; 1946 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); 1947 vdev->vq[i].signalled_used = 0; 1948 vdev->vq[i].signalled_used_valid = false; 1949 vdev->vq[i].notification = true; 1950 vdev->vq[i].vring.num = vdev->vq[i].vring.num_default; 1951 vdev->vq[i].inuse = 0; 1952 virtio_virtqueue_reset_region_cache(&vdev->vq[i]); 1953 } 1954 } 1955 1956 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr) 1957 { 1958 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1959 uint8_t val; 1960 1961 if (addr + sizeof(val) > vdev->config_len) { 1962 return (uint32_t)-1; 1963 } 1964 1965 k->get_config(vdev, vdev->config); 1966 1967 val = ldub_p(vdev->config + addr); 1968 return val; 1969 } 1970 1971 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr) 1972 { 1973 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1974 uint16_t val; 1975 1976 if (addr + sizeof(val) > vdev->config_len) { 1977 return (uint32_t)-1; 1978 } 1979 1980 k->get_config(vdev, vdev->config); 1981 1982 val = lduw_p(vdev->config + addr); 1983 return val; 1984 } 1985 1986 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr) 1987 { 1988 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1989 uint32_t val; 1990 1991 if (addr + sizeof(val) > vdev->config_len) { 1992 return (uint32_t)-1; 1993 } 1994 1995 k->get_config(vdev, vdev->config); 1996 1997 val = ldl_p(vdev->config + addr); 1998 return val; 1999 } 2000 2001 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data) 2002 { 2003 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2004 uint8_t val = data; 2005 2006 if (addr + sizeof(val) > vdev->config_len) { 2007 return; 2008 } 2009 2010 stb_p(vdev->config + addr, val); 2011 2012 if (k->set_config) { 2013 k->set_config(vdev, vdev->config); 2014 } 2015 } 2016 2017 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data) 2018 { 2019 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2020 uint16_t val = data; 2021 2022 if (addr + sizeof(val) > vdev->config_len) { 2023 return; 2024 } 2025 2026 stw_p(vdev->config + addr, val); 2027 2028 if (k->set_config) { 2029 k->set_config(vdev, vdev->config); 2030 } 2031 } 2032 2033 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data) 2034 { 2035 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2036 uint32_t val = data; 2037 2038 if (addr + sizeof(val) > vdev->config_len) { 2039 return; 2040 } 2041 2042 stl_p(vdev->config + addr, val); 2043 2044 if (k->set_config) { 2045 k->set_config(vdev, vdev->config); 2046 } 2047 } 2048 2049 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr) 2050 { 2051 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2052 uint8_t val; 2053 2054 if (addr + sizeof(val) > vdev->config_len) { 2055 return (uint32_t)-1; 2056 } 2057 2058 k->get_config(vdev, vdev->config); 2059 2060 val = ldub_p(vdev->config + addr); 2061 return val; 2062 } 2063 2064 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr) 2065 { 2066 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2067 uint16_t val; 2068 2069 if (addr + sizeof(val) > vdev->config_len) { 2070 return (uint32_t)-1; 2071 } 2072 2073 k->get_config(vdev, vdev->config); 2074 2075 val = lduw_le_p(vdev->config + addr); 2076 return val; 2077 } 2078 2079 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr) 2080 { 2081 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2082 uint32_t val; 2083 2084 if (addr + sizeof(val) > vdev->config_len) { 2085 return (uint32_t)-1; 2086 } 2087 2088 k->get_config(vdev, vdev->config); 2089 2090 val = ldl_le_p(vdev->config + addr); 2091 return val; 2092 } 2093 2094 void virtio_config_modern_writeb(VirtIODevice *vdev, 2095 uint32_t addr, uint32_t data) 2096 { 2097 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2098 uint8_t val = data; 2099 2100 if (addr + sizeof(val) > vdev->config_len) { 2101 return; 2102 } 2103 2104 stb_p(vdev->config + addr, val); 2105 2106 if (k->set_config) { 2107 k->set_config(vdev, vdev->config); 2108 } 2109 } 2110 2111 void virtio_config_modern_writew(VirtIODevice *vdev, 2112 uint32_t addr, uint32_t data) 2113 { 2114 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2115 uint16_t val = data; 2116 2117 if (addr + sizeof(val) > vdev->config_len) { 2118 return; 2119 } 2120 2121 stw_le_p(vdev->config + addr, val); 2122 2123 if (k->set_config) { 2124 k->set_config(vdev, vdev->config); 2125 } 2126 } 2127 2128 void virtio_config_modern_writel(VirtIODevice *vdev, 2129 uint32_t addr, uint32_t data) 2130 { 2131 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2132 uint32_t val = data; 2133 2134 if (addr + sizeof(val) > vdev->config_len) { 2135 return; 2136 } 2137 2138 stl_le_p(vdev->config + addr, val); 2139 2140 if (k->set_config) { 2141 k->set_config(vdev, vdev->config); 2142 } 2143 } 2144 2145 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr) 2146 { 2147 if (!vdev->vq[n].vring.num) { 2148 return; 2149 } 2150 vdev->vq[n].vring.desc = addr; 2151 virtio_queue_update_rings(vdev, n); 2152 } 2153 2154 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n) 2155 { 2156 return vdev->vq[n].vring.desc; 2157 } 2158 2159 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc, 2160 hwaddr avail, hwaddr used) 2161 { 2162 if (!vdev->vq[n].vring.num) { 2163 return; 2164 } 2165 vdev->vq[n].vring.desc = desc; 2166 vdev->vq[n].vring.avail = avail; 2167 vdev->vq[n].vring.used = used; 2168 virtio_init_region_cache(vdev, n); 2169 } 2170 2171 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) 2172 { 2173 /* Don't allow guest to flip queue between existent and 2174 * nonexistent states, or to set it to an invalid size. 2175 */ 2176 if (!!num != !!vdev->vq[n].vring.num || 2177 num > VIRTQUEUE_MAX_SIZE || 2178 num < 0) { 2179 return; 2180 } 2181 vdev->vq[n].vring.num = num; 2182 } 2183 2184 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector) 2185 { 2186 return QLIST_FIRST(&vdev->vector_queues[vector]); 2187 } 2188 2189 VirtQueue *virtio_vector_next_queue(VirtQueue *vq) 2190 { 2191 return QLIST_NEXT(vq, node); 2192 } 2193 2194 int virtio_queue_get_num(VirtIODevice *vdev, int n) 2195 { 2196 return vdev->vq[n].vring.num; 2197 } 2198 2199 int virtio_queue_get_max_num(VirtIODevice *vdev, int n) 2200 { 2201 return vdev->vq[n].vring.num_default; 2202 } 2203 2204 int virtio_get_num_queues(VirtIODevice *vdev) 2205 { 2206 int i; 2207 2208 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2209 if (!virtio_queue_get_num(vdev, i)) { 2210 break; 2211 } 2212 } 2213 2214 return i; 2215 } 2216 2217 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) 2218 { 2219 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 2220 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 2221 2222 /* virtio-1 compliant devices cannot change the alignment */ 2223 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 2224 error_report("tried to modify queue alignment for virtio-1 device"); 2225 return; 2226 } 2227 /* Check that the transport told us it was going to do this 2228 * (so a buggy transport will immediately assert rather than 2229 * silently failing to migrate this state) 2230 */ 2231 assert(k->has_variable_vring_alignment); 2232 2233 if (align) { 2234 vdev->vq[n].vring.align = align; 2235 virtio_queue_update_rings(vdev, n); 2236 } 2237 } 2238 2239 static bool virtio_queue_notify_aio_vq(VirtQueue *vq) 2240 { 2241 bool ret = false; 2242 2243 if (vq->vring.desc && vq->handle_aio_output) { 2244 VirtIODevice *vdev = vq->vdev; 2245 2246 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); 2247 ret = vq->handle_aio_output(vdev, vq); 2248 2249 if (unlikely(vdev->start_on_kick)) { 2250 virtio_set_started(vdev, true); 2251 } 2252 } 2253 2254 return ret; 2255 } 2256 2257 static void virtio_queue_notify_vq(VirtQueue *vq) 2258 { 2259 if (vq->vring.desc && vq->handle_output) { 2260 VirtIODevice *vdev = vq->vdev; 2261 2262 if (unlikely(vdev->broken)) { 2263 return; 2264 } 2265 2266 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); 2267 vq->handle_output(vdev, vq); 2268 2269 if (unlikely(vdev->start_on_kick)) { 2270 virtio_set_started(vdev, true); 2271 } 2272 } 2273 } 2274 2275 void virtio_queue_notify(VirtIODevice *vdev, int n) 2276 { 2277 VirtQueue *vq = &vdev->vq[n]; 2278 2279 if (unlikely(!vq->vring.desc || vdev->broken)) { 2280 return; 2281 } 2282 2283 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); 2284 if (vq->handle_aio_output) { 2285 event_notifier_set(&vq->host_notifier); 2286 } else if (vq->handle_output) { 2287 vq->handle_output(vdev, vq); 2288 2289 if (unlikely(vdev->start_on_kick)) { 2290 virtio_set_started(vdev, true); 2291 } 2292 } 2293 } 2294 2295 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) 2296 { 2297 return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector : 2298 VIRTIO_NO_VECTOR; 2299 } 2300 2301 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector) 2302 { 2303 VirtQueue *vq = &vdev->vq[n]; 2304 2305 if (n < VIRTIO_QUEUE_MAX) { 2306 if (vdev->vector_queues && 2307 vdev->vq[n].vector != VIRTIO_NO_VECTOR) { 2308 QLIST_REMOVE(vq, node); 2309 } 2310 vdev->vq[n].vector = vector; 2311 if (vdev->vector_queues && 2312 vector != VIRTIO_NO_VECTOR) { 2313 QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node); 2314 } 2315 } 2316 } 2317 2318 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, 2319 VirtIOHandleOutput handle_output) 2320 { 2321 int i; 2322 2323 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2324 if (vdev->vq[i].vring.num == 0) 2325 break; 2326 } 2327 2328 if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) 2329 abort(); 2330 2331 vdev->vq[i].vring.num = queue_size; 2332 vdev->vq[i].vring.num_default = queue_size; 2333 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN; 2334 vdev->vq[i].handle_output = handle_output; 2335 vdev->vq[i].handle_aio_output = NULL; 2336 vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) * 2337 queue_size); 2338 2339 return &vdev->vq[i]; 2340 } 2341 2342 void virtio_del_queue(VirtIODevice *vdev, int n) 2343 { 2344 if (n < 0 || n >= VIRTIO_QUEUE_MAX) { 2345 abort(); 2346 } 2347 2348 vdev->vq[n].vring.num = 0; 2349 vdev->vq[n].vring.num_default = 0; 2350 vdev->vq[n].handle_output = NULL; 2351 vdev->vq[n].handle_aio_output = NULL; 2352 g_free(vdev->vq[n].used_elems); 2353 } 2354 2355 static void virtio_set_isr(VirtIODevice *vdev, int value) 2356 { 2357 uint8_t old = atomic_read(&vdev->isr); 2358 2359 /* Do not write ISR if it does not change, so that its cacheline remains 2360 * shared in the common case where the guest does not read it. 2361 */ 2362 if ((old & value) != value) { 2363 atomic_or(&vdev->isr, value); 2364 } 2365 } 2366 2367 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq) 2368 { 2369 uint16_t old, new; 2370 bool v; 2371 /* We need to expose used array entries before checking used event. */ 2372 smp_mb(); 2373 /* Always notify when queue is empty (when feature acknowledge) */ 2374 if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) && 2375 !vq->inuse && virtio_queue_empty(vq)) { 2376 return true; 2377 } 2378 2379 if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 2380 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); 2381 } 2382 2383 v = vq->signalled_used_valid; 2384 vq->signalled_used_valid = true; 2385 old = vq->signalled_used; 2386 new = vq->signalled_used = vq->used_idx; 2387 return !v || vring_need_event(vring_get_used_event(vq), new, old); 2388 } 2389 2390 static bool vring_packed_need_event(VirtQueue *vq, bool wrap, 2391 uint16_t off_wrap, uint16_t new, 2392 uint16_t old) 2393 { 2394 int off = off_wrap & ~(1 << 15); 2395 2396 if (wrap != off_wrap >> 15) { 2397 off -= vq->vring.num; 2398 } 2399 2400 return vring_need_event(off, new, old); 2401 } 2402 2403 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq) 2404 { 2405 VRingPackedDescEvent e; 2406 uint16_t old, new; 2407 bool v; 2408 VRingMemoryRegionCaches *caches; 2409 2410 caches = vring_get_region_caches(vq); 2411 vring_packed_event_read(vdev, &caches->avail, &e); 2412 2413 old = vq->signalled_used; 2414 new = vq->signalled_used = vq->used_idx; 2415 v = vq->signalled_used_valid; 2416 vq->signalled_used_valid = true; 2417 2418 if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) { 2419 return false; 2420 } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) { 2421 return true; 2422 } 2423 2424 return !v || vring_packed_need_event(vq, vq->used_wrap_counter, 2425 e.off_wrap, new, old); 2426 } 2427 2428 /* Called within rcu_read_lock(). */ 2429 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) 2430 { 2431 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 2432 return virtio_packed_should_notify(vdev, vq); 2433 } else { 2434 return virtio_split_should_notify(vdev, vq); 2435 } 2436 } 2437 2438 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq) 2439 { 2440 bool should_notify; 2441 rcu_read_lock(); 2442 should_notify = virtio_should_notify(vdev, vq); 2443 rcu_read_unlock(); 2444 2445 if (!should_notify) { 2446 return; 2447 } 2448 2449 trace_virtio_notify_irqfd(vdev, vq); 2450 2451 /* 2452 * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but 2453 * windows drivers included in virtio-win 1.8.0 (circa 2015) are 2454 * incorrectly polling this bit during crashdump and hibernation 2455 * in MSI mode, causing a hang if this bit is never updated. 2456 * Recent releases of Windows do not really shut down, but rather 2457 * log out and hibernate to make the next startup faster. Hence, 2458 * this manifested as a more serious hang during shutdown with 2459 * 2460 * Next driver release from 2016 fixed this problem, so working around it 2461 * is not a must, but it's easy to do so let's do it here. 2462 * 2463 * Note: it's safe to update ISR from any thread as it was switched 2464 * to an atomic operation. 2465 */ 2466 virtio_set_isr(vq->vdev, 0x1); 2467 event_notifier_set(&vq->guest_notifier); 2468 } 2469 2470 static void virtio_irq(VirtQueue *vq) 2471 { 2472 virtio_set_isr(vq->vdev, 0x1); 2473 virtio_notify_vector(vq->vdev, vq->vector); 2474 } 2475 2476 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) 2477 { 2478 bool should_notify; 2479 rcu_read_lock(); 2480 should_notify = virtio_should_notify(vdev, vq); 2481 rcu_read_unlock(); 2482 2483 if (!should_notify) { 2484 return; 2485 } 2486 2487 trace_virtio_notify(vdev, vq); 2488 virtio_irq(vq); 2489 } 2490 2491 void virtio_notify_config(VirtIODevice *vdev) 2492 { 2493 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 2494 return; 2495 2496 virtio_set_isr(vdev, 0x3); 2497 vdev->generation++; 2498 virtio_notify_vector(vdev, vdev->config_vector); 2499 } 2500 2501 static bool virtio_device_endian_needed(void *opaque) 2502 { 2503 VirtIODevice *vdev = opaque; 2504 2505 assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN); 2506 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 2507 return vdev->device_endian != virtio_default_endian(); 2508 } 2509 /* Devices conforming to VIRTIO 1.0 or later are always LE. */ 2510 return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE; 2511 } 2512 2513 static bool virtio_64bit_features_needed(void *opaque) 2514 { 2515 VirtIODevice *vdev = opaque; 2516 2517 return (vdev->host_features >> 32) != 0; 2518 } 2519 2520 static bool virtio_virtqueue_needed(void *opaque) 2521 { 2522 VirtIODevice *vdev = opaque; 2523 2524 return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1); 2525 } 2526 2527 static bool virtio_packed_virtqueue_needed(void *opaque) 2528 { 2529 VirtIODevice *vdev = opaque; 2530 2531 return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED); 2532 } 2533 2534 static bool virtio_ringsize_needed(void *opaque) 2535 { 2536 VirtIODevice *vdev = opaque; 2537 int i; 2538 2539 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2540 if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) { 2541 return true; 2542 } 2543 } 2544 return false; 2545 } 2546 2547 static bool virtio_extra_state_needed(void *opaque) 2548 { 2549 VirtIODevice *vdev = opaque; 2550 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 2551 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 2552 2553 return k->has_extra_state && 2554 k->has_extra_state(qbus->parent); 2555 } 2556 2557 static bool virtio_broken_needed(void *opaque) 2558 { 2559 VirtIODevice *vdev = opaque; 2560 2561 return vdev->broken; 2562 } 2563 2564 static bool virtio_started_needed(void *opaque) 2565 { 2566 VirtIODevice *vdev = opaque; 2567 2568 return vdev->started; 2569 } 2570 2571 static const VMStateDescription vmstate_virtqueue = { 2572 .name = "virtqueue_state", 2573 .version_id = 1, 2574 .minimum_version_id = 1, 2575 .fields = (VMStateField[]) { 2576 VMSTATE_UINT64(vring.avail, struct VirtQueue), 2577 VMSTATE_UINT64(vring.used, struct VirtQueue), 2578 VMSTATE_END_OF_LIST() 2579 } 2580 }; 2581 2582 static const VMStateDescription vmstate_packed_virtqueue = { 2583 .name = "packed_virtqueue_state", 2584 .version_id = 1, 2585 .minimum_version_id = 1, 2586 .fields = (VMStateField[]) { 2587 VMSTATE_UINT16(last_avail_idx, struct VirtQueue), 2588 VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue), 2589 VMSTATE_UINT16(used_idx, struct VirtQueue), 2590 VMSTATE_BOOL(used_wrap_counter, struct VirtQueue), 2591 VMSTATE_UINT32(inuse, struct VirtQueue), 2592 VMSTATE_END_OF_LIST() 2593 } 2594 }; 2595 2596 static const VMStateDescription vmstate_virtio_virtqueues = { 2597 .name = "virtio/virtqueues", 2598 .version_id = 1, 2599 .minimum_version_id = 1, 2600 .needed = &virtio_virtqueue_needed, 2601 .fields = (VMStateField[]) { 2602 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, 2603 VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue), 2604 VMSTATE_END_OF_LIST() 2605 } 2606 }; 2607 2608 static const VMStateDescription vmstate_virtio_packed_virtqueues = { 2609 .name = "virtio/packed_virtqueues", 2610 .version_id = 1, 2611 .minimum_version_id = 1, 2612 .needed = &virtio_packed_virtqueue_needed, 2613 .fields = (VMStateField[]) { 2614 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, 2615 VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue), 2616 VMSTATE_END_OF_LIST() 2617 } 2618 }; 2619 2620 static const VMStateDescription vmstate_ringsize = { 2621 .name = "ringsize_state", 2622 .version_id = 1, 2623 .minimum_version_id = 1, 2624 .fields = (VMStateField[]) { 2625 VMSTATE_UINT32(vring.num_default, struct VirtQueue), 2626 VMSTATE_END_OF_LIST() 2627 } 2628 }; 2629 2630 static const VMStateDescription vmstate_virtio_ringsize = { 2631 .name = "virtio/ringsize", 2632 .version_id = 1, 2633 .minimum_version_id = 1, 2634 .needed = &virtio_ringsize_needed, 2635 .fields = (VMStateField[]) { 2636 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, 2637 VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue), 2638 VMSTATE_END_OF_LIST() 2639 } 2640 }; 2641 2642 static int get_extra_state(QEMUFile *f, void *pv, size_t size, 2643 const VMStateField *field) 2644 { 2645 VirtIODevice *vdev = pv; 2646 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 2647 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 2648 2649 if (!k->load_extra_state) { 2650 return -1; 2651 } else { 2652 return k->load_extra_state(qbus->parent, f); 2653 } 2654 } 2655 2656 static int put_extra_state(QEMUFile *f, void *pv, size_t size, 2657 const VMStateField *field, QJSON *vmdesc) 2658 { 2659 VirtIODevice *vdev = pv; 2660 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 2661 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 2662 2663 k->save_extra_state(qbus->parent, f); 2664 return 0; 2665 } 2666 2667 static const VMStateInfo vmstate_info_extra_state = { 2668 .name = "virtqueue_extra_state", 2669 .get = get_extra_state, 2670 .put = put_extra_state, 2671 }; 2672 2673 static const VMStateDescription vmstate_virtio_extra_state = { 2674 .name = "virtio/extra_state", 2675 .version_id = 1, 2676 .minimum_version_id = 1, 2677 .needed = &virtio_extra_state_needed, 2678 .fields = (VMStateField[]) { 2679 { 2680 .name = "extra_state", 2681 .version_id = 0, 2682 .field_exists = NULL, 2683 .size = 0, 2684 .info = &vmstate_info_extra_state, 2685 .flags = VMS_SINGLE, 2686 .offset = 0, 2687 }, 2688 VMSTATE_END_OF_LIST() 2689 } 2690 }; 2691 2692 static const VMStateDescription vmstate_virtio_device_endian = { 2693 .name = "virtio/device_endian", 2694 .version_id = 1, 2695 .minimum_version_id = 1, 2696 .needed = &virtio_device_endian_needed, 2697 .fields = (VMStateField[]) { 2698 VMSTATE_UINT8(device_endian, VirtIODevice), 2699 VMSTATE_END_OF_LIST() 2700 } 2701 }; 2702 2703 static const VMStateDescription vmstate_virtio_64bit_features = { 2704 .name = "virtio/64bit_features", 2705 .version_id = 1, 2706 .minimum_version_id = 1, 2707 .needed = &virtio_64bit_features_needed, 2708 .fields = (VMStateField[]) { 2709 VMSTATE_UINT64(guest_features, VirtIODevice), 2710 VMSTATE_END_OF_LIST() 2711 } 2712 }; 2713 2714 static const VMStateDescription vmstate_virtio_broken = { 2715 .name = "virtio/broken", 2716 .version_id = 1, 2717 .minimum_version_id = 1, 2718 .needed = &virtio_broken_needed, 2719 .fields = (VMStateField[]) { 2720 VMSTATE_BOOL(broken, VirtIODevice), 2721 VMSTATE_END_OF_LIST() 2722 } 2723 }; 2724 2725 static const VMStateDescription vmstate_virtio_started = { 2726 .name = "virtio/started", 2727 .version_id = 1, 2728 .minimum_version_id = 1, 2729 .needed = &virtio_started_needed, 2730 .fields = (VMStateField[]) { 2731 VMSTATE_BOOL(started, VirtIODevice), 2732 VMSTATE_END_OF_LIST() 2733 } 2734 }; 2735 2736 static const VMStateDescription vmstate_virtio = { 2737 .name = "virtio", 2738 .version_id = 1, 2739 .minimum_version_id = 1, 2740 .minimum_version_id_old = 1, 2741 .fields = (VMStateField[]) { 2742 VMSTATE_END_OF_LIST() 2743 }, 2744 .subsections = (const VMStateDescription*[]) { 2745 &vmstate_virtio_device_endian, 2746 &vmstate_virtio_64bit_features, 2747 &vmstate_virtio_virtqueues, 2748 &vmstate_virtio_ringsize, 2749 &vmstate_virtio_broken, 2750 &vmstate_virtio_extra_state, 2751 &vmstate_virtio_started, 2752 &vmstate_virtio_packed_virtqueues, 2753 NULL 2754 } 2755 }; 2756 2757 int virtio_save(VirtIODevice *vdev, QEMUFile *f) 2758 { 2759 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 2760 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 2761 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 2762 uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff); 2763 int i; 2764 2765 if (k->save_config) { 2766 k->save_config(qbus->parent, f); 2767 } 2768 2769 qemu_put_8s(f, &vdev->status); 2770 qemu_put_8s(f, &vdev->isr); 2771 qemu_put_be16s(f, &vdev->queue_sel); 2772 qemu_put_be32s(f, &guest_features_lo); 2773 qemu_put_be32(f, vdev->config_len); 2774 qemu_put_buffer(f, vdev->config, vdev->config_len); 2775 2776 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2777 if (vdev->vq[i].vring.num == 0) 2778 break; 2779 } 2780 2781 qemu_put_be32(f, i); 2782 2783 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2784 if (vdev->vq[i].vring.num == 0) 2785 break; 2786 2787 qemu_put_be32(f, vdev->vq[i].vring.num); 2788 if (k->has_variable_vring_alignment) { 2789 qemu_put_be32(f, vdev->vq[i].vring.align); 2790 } 2791 /* 2792 * Save desc now, the rest of the ring addresses are saved in 2793 * subsections for VIRTIO-1 devices. 2794 */ 2795 qemu_put_be64(f, vdev->vq[i].vring.desc); 2796 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); 2797 if (k->save_queue) { 2798 k->save_queue(qbus->parent, i, f); 2799 } 2800 } 2801 2802 if (vdc->save != NULL) { 2803 vdc->save(vdev, f); 2804 } 2805 2806 if (vdc->vmsd) { 2807 int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL); 2808 if (ret) { 2809 return ret; 2810 } 2811 } 2812 2813 /* Subsections */ 2814 return vmstate_save_state(f, &vmstate_virtio, vdev, NULL); 2815 } 2816 2817 /* A wrapper for use as a VMState .put function */ 2818 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size, 2819 const VMStateField *field, QJSON *vmdesc) 2820 { 2821 return virtio_save(VIRTIO_DEVICE(opaque), f); 2822 } 2823 2824 /* A wrapper for use as a VMState .get function */ 2825 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size, 2826 const VMStateField *field) 2827 { 2828 VirtIODevice *vdev = VIRTIO_DEVICE(opaque); 2829 DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev)); 2830 2831 return virtio_load(vdev, f, dc->vmsd->version_id); 2832 } 2833 2834 const VMStateInfo virtio_vmstate_info = { 2835 .name = "virtio", 2836 .get = virtio_device_get, 2837 .put = virtio_device_put, 2838 }; 2839 2840 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) 2841 { 2842 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 2843 bool bad = (val & ~(vdev->host_features)) != 0; 2844 2845 val &= vdev->host_features; 2846 if (k->set_features) { 2847 k->set_features(vdev, val); 2848 } 2849 vdev->guest_features = val; 2850 return bad ? -1 : 0; 2851 } 2852 2853 int virtio_set_features(VirtIODevice *vdev, uint64_t val) 2854 { 2855 int ret; 2856 /* 2857 * The driver must not attempt to set features after feature negotiation 2858 * has finished. 2859 */ 2860 if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) { 2861 return -EINVAL; 2862 } 2863 ret = virtio_set_features_nocheck(vdev, val); 2864 if (!ret) { 2865 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 2866 /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */ 2867 int i; 2868 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2869 if (vdev->vq[i].vring.num != 0) { 2870 virtio_init_region_cache(vdev, i); 2871 } 2872 } 2873 } 2874 2875 if (!virtio_device_started(vdev, vdev->status) && 2876 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 2877 vdev->start_on_kick = true; 2878 } 2879 } 2880 return ret; 2881 } 2882 2883 size_t virtio_feature_get_config_size(VirtIOFeature *feature_sizes, 2884 uint64_t host_features) 2885 { 2886 size_t config_size = 0; 2887 int i; 2888 2889 for (i = 0; feature_sizes[i].flags != 0; i++) { 2890 if (host_features & feature_sizes[i].flags) { 2891 config_size = MAX(feature_sizes[i].end, config_size); 2892 } 2893 } 2894 2895 return config_size; 2896 } 2897 2898 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) 2899 { 2900 int i, ret; 2901 int32_t config_len; 2902 uint32_t num; 2903 uint32_t features; 2904 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 2905 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 2906 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 2907 2908 /* 2909 * We poison the endianness to ensure it does not get used before 2910 * subsections have been loaded. 2911 */ 2912 vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN; 2913 2914 if (k->load_config) { 2915 ret = k->load_config(qbus->parent, f); 2916 if (ret) 2917 return ret; 2918 } 2919 2920 qemu_get_8s(f, &vdev->status); 2921 qemu_get_8s(f, &vdev->isr); 2922 qemu_get_be16s(f, &vdev->queue_sel); 2923 if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) { 2924 return -1; 2925 } 2926 qemu_get_be32s(f, &features); 2927 2928 /* 2929 * Temporarily set guest_features low bits - needed by 2930 * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2931 * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ. 2932 * 2933 * Note: devices should always test host features in future - don't create 2934 * new dependencies like this. 2935 */ 2936 vdev->guest_features = features; 2937 2938 config_len = qemu_get_be32(f); 2939 2940 /* 2941 * There are cases where the incoming config can be bigger or smaller 2942 * than what we have; so load what we have space for, and skip 2943 * any excess that's in the stream. 2944 */ 2945 qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len)); 2946 2947 while (config_len > vdev->config_len) { 2948 qemu_get_byte(f); 2949 config_len--; 2950 } 2951 2952 num = qemu_get_be32(f); 2953 2954 if (num > VIRTIO_QUEUE_MAX) { 2955 error_report("Invalid number of virtqueues: 0x%x", num); 2956 return -1; 2957 } 2958 2959 for (i = 0; i < num; i++) { 2960 vdev->vq[i].vring.num = qemu_get_be32(f); 2961 if (k->has_variable_vring_alignment) { 2962 vdev->vq[i].vring.align = qemu_get_be32(f); 2963 } 2964 vdev->vq[i].vring.desc = qemu_get_be64(f); 2965 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); 2966 vdev->vq[i].signalled_used_valid = false; 2967 vdev->vq[i].notification = true; 2968 2969 if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) { 2970 error_report("VQ %d address 0x0 " 2971 "inconsistent with Host index 0x%x", 2972 i, vdev->vq[i].last_avail_idx); 2973 return -1; 2974 } 2975 if (k->load_queue) { 2976 ret = k->load_queue(qbus->parent, i, f); 2977 if (ret) 2978 return ret; 2979 } 2980 } 2981 2982 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 2983 2984 if (vdc->load != NULL) { 2985 ret = vdc->load(vdev, f, version_id); 2986 if (ret) { 2987 return ret; 2988 } 2989 } 2990 2991 if (vdc->vmsd) { 2992 ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id); 2993 if (ret) { 2994 return ret; 2995 } 2996 } 2997 2998 /* Subsections */ 2999 ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1); 3000 if (ret) { 3001 return ret; 3002 } 3003 3004 if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) { 3005 vdev->device_endian = virtio_default_endian(); 3006 } 3007 3008 if (virtio_64bit_features_needed(vdev)) { 3009 /* 3010 * Subsection load filled vdev->guest_features. Run them 3011 * through virtio_set_features to sanity-check them against 3012 * host_features. 3013 */ 3014 uint64_t features64 = vdev->guest_features; 3015 if (virtio_set_features_nocheck(vdev, features64) < 0) { 3016 error_report("Features 0x%" PRIx64 " unsupported. " 3017 "Allowed features: 0x%" PRIx64, 3018 features64, vdev->host_features); 3019 return -1; 3020 } 3021 } else { 3022 if (virtio_set_features_nocheck(vdev, features) < 0) { 3023 error_report("Features 0x%x unsupported. " 3024 "Allowed features: 0x%" PRIx64, 3025 features, vdev->host_features); 3026 return -1; 3027 } 3028 } 3029 3030 if (!virtio_device_started(vdev, vdev->status) && 3031 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3032 vdev->start_on_kick = true; 3033 } 3034 3035 rcu_read_lock(); 3036 for (i = 0; i < num; i++) { 3037 if (vdev->vq[i].vring.desc) { 3038 uint16_t nheads; 3039 3040 /* 3041 * VIRTIO-1 devices migrate desc, used, and avail ring addresses so 3042 * only the region cache needs to be set up. Legacy devices need 3043 * to calculate used and avail ring addresses based on the desc 3044 * address. 3045 */ 3046 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3047 virtio_init_region_cache(vdev, i); 3048 } else { 3049 virtio_queue_update_rings(vdev, i); 3050 } 3051 3052 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 3053 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx; 3054 vdev->vq[i].shadow_avail_wrap_counter = 3055 vdev->vq[i].last_avail_wrap_counter; 3056 continue; 3057 } 3058 3059 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx; 3060 /* Check it isn't doing strange things with descriptor numbers. */ 3061 if (nheads > vdev->vq[i].vring.num) { 3062 error_report("VQ %d size 0x%x Guest index 0x%x " 3063 "inconsistent with Host index 0x%x: delta 0x%x", 3064 i, vdev->vq[i].vring.num, 3065 vring_avail_idx(&vdev->vq[i]), 3066 vdev->vq[i].last_avail_idx, nheads); 3067 return -1; 3068 } 3069 vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]); 3070 vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]); 3071 3072 /* 3073 * Some devices migrate VirtQueueElements that have been popped 3074 * from the avail ring but not yet returned to the used ring. 3075 * Since max ring size < UINT16_MAX it's safe to use modulo 3076 * UINT16_MAX + 1 subtraction. 3077 */ 3078 vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx - 3079 vdev->vq[i].used_idx); 3080 if (vdev->vq[i].inuse > vdev->vq[i].vring.num) { 3081 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - " 3082 "used_idx 0x%x", 3083 i, vdev->vq[i].vring.num, 3084 vdev->vq[i].last_avail_idx, 3085 vdev->vq[i].used_idx); 3086 return -1; 3087 } 3088 } 3089 } 3090 rcu_read_unlock(); 3091 3092 return 0; 3093 } 3094 3095 void virtio_cleanup(VirtIODevice *vdev) 3096 { 3097 qemu_del_vm_change_state_handler(vdev->vmstate); 3098 } 3099 3100 static void virtio_vmstate_change(void *opaque, int running, RunState state) 3101 { 3102 VirtIODevice *vdev = opaque; 3103 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 3104 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 3105 bool backend_run = running && virtio_device_started(vdev, vdev->status); 3106 vdev->vm_running = running; 3107 3108 if (backend_run) { 3109 virtio_set_status(vdev, vdev->status); 3110 } 3111 3112 if (k->vmstate_change) { 3113 k->vmstate_change(qbus->parent, backend_run); 3114 } 3115 3116 if (!backend_run) { 3117 virtio_set_status(vdev, vdev->status); 3118 } 3119 } 3120 3121 void virtio_instance_init_common(Object *proxy_obj, void *data, 3122 size_t vdev_size, const char *vdev_name) 3123 { 3124 DeviceState *vdev = data; 3125 3126 object_initialize_child(proxy_obj, "virtio-backend", vdev, vdev_size, 3127 vdev_name, &error_abort, NULL); 3128 qdev_alias_all_properties(vdev, proxy_obj); 3129 } 3130 3131 void virtio_init(VirtIODevice *vdev, const char *name, 3132 uint16_t device_id, size_t config_size) 3133 { 3134 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 3135 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 3136 int i; 3137 int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0; 3138 3139 if (nvectors) { 3140 vdev->vector_queues = 3141 g_malloc0(sizeof(*vdev->vector_queues) * nvectors); 3142 } 3143 3144 vdev->start_on_kick = false; 3145 vdev->started = false; 3146 vdev->device_id = device_id; 3147 vdev->status = 0; 3148 atomic_set(&vdev->isr, 0); 3149 vdev->queue_sel = 0; 3150 vdev->config_vector = VIRTIO_NO_VECTOR; 3151 vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX); 3152 vdev->vm_running = runstate_is_running(); 3153 vdev->broken = false; 3154 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 3155 vdev->vq[i].vector = VIRTIO_NO_VECTOR; 3156 vdev->vq[i].vdev = vdev; 3157 vdev->vq[i].queue_index = i; 3158 } 3159 3160 vdev->name = name; 3161 vdev->config_len = config_size; 3162 if (vdev->config_len) { 3163 vdev->config = g_malloc0(config_size); 3164 } else { 3165 vdev->config = NULL; 3166 } 3167 vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev), 3168 virtio_vmstate_change, vdev); 3169 vdev->device_endian = virtio_default_endian(); 3170 vdev->use_guest_notifier_mask = true; 3171 } 3172 3173 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) 3174 { 3175 return vdev->vq[n].vring.desc; 3176 } 3177 3178 bool virtio_queue_enabled(VirtIODevice *vdev, int n) 3179 { 3180 return virtio_queue_get_desc_addr(vdev, n) != 0; 3181 } 3182 3183 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n) 3184 { 3185 return vdev->vq[n].vring.avail; 3186 } 3187 3188 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n) 3189 { 3190 return vdev->vq[n].vring.used; 3191 } 3192 3193 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) 3194 { 3195 return sizeof(VRingDesc) * vdev->vq[n].vring.num; 3196 } 3197 3198 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) 3199 { 3200 int s; 3201 3202 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 3203 return sizeof(struct VRingPackedDescEvent); 3204 } 3205 3206 s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; 3207 return offsetof(VRingAvail, ring) + 3208 sizeof(uint16_t) * vdev->vq[n].vring.num + s; 3209 } 3210 3211 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) 3212 { 3213 int s; 3214 3215 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 3216 return sizeof(struct VRingPackedDescEvent); 3217 } 3218 3219 s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; 3220 return offsetof(VRingUsed, ring) + 3221 sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s; 3222 } 3223 3224 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev, 3225 int n) 3226 { 3227 unsigned int avail, used; 3228 3229 avail = vdev->vq[n].last_avail_idx; 3230 avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15; 3231 3232 used = vdev->vq[n].used_idx; 3233 used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15; 3234 3235 return avail | used << 16; 3236 } 3237 3238 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev, 3239 int n) 3240 { 3241 return vdev->vq[n].last_avail_idx; 3242 } 3243 3244 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) 3245 { 3246 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 3247 return virtio_queue_packed_get_last_avail_idx(vdev, n); 3248 } else { 3249 return virtio_queue_split_get_last_avail_idx(vdev, n); 3250 } 3251 } 3252 3253 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev, 3254 int n, unsigned int idx) 3255 { 3256 struct VirtQueue *vq = &vdev->vq[n]; 3257 3258 vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff; 3259 vq->last_avail_wrap_counter = 3260 vq->shadow_avail_wrap_counter = !!(idx & 0x8000); 3261 idx >>= 16; 3262 vq->used_idx = idx & 0x7ffff; 3263 vq->used_wrap_counter = !!(idx & 0x8000); 3264 } 3265 3266 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev, 3267 int n, unsigned int idx) 3268 { 3269 vdev->vq[n].last_avail_idx = idx; 3270 vdev->vq[n].shadow_avail_idx = idx; 3271 } 3272 3273 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, 3274 unsigned int idx) 3275 { 3276 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 3277 virtio_queue_packed_set_last_avail_idx(vdev, n, idx); 3278 } else { 3279 virtio_queue_split_set_last_avail_idx(vdev, n, idx); 3280 } 3281 } 3282 3283 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev, 3284 int n) 3285 { 3286 /* We don't have a reference like avail idx in shared memory */ 3287 return; 3288 } 3289 3290 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev, 3291 int n) 3292 { 3293 rcu_read_lock(); 3294 if (vdev->vq[n].vring.desc) { 3295 vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]); 3296 vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx; 3297 } 3298 rcu_read_unlock(); 3299 } 3300 3301 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n) 3302 { 3303 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 3304 virtio_queue_packed_restore_last_avail_idx(vdev, n); 3305 } else { 3306 virtio_queue_split_restore_last_avail_idx(vdev, n); 3307 } 3308 } 3309 3310 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n) 3311 { 3312 /* used idx was updated through set_last_avail_idx() */ 3313 return; 3314 } 3315 3316 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n) 3317 { 3318 rcu_read_lock(); 3319 if (vdev->vq[n].vring.desc) { 3320 vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]); 3321 } 3322 rcu_read_unlock(); 3323 } 3324 3325 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n) 3326 { 3327 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 3328 return virtio_queue_packed_update_used_idx(vdev, n); 3329 } else { 3330 return virtio_split_packed_update_used_idx(vdev, n); 3331 } 3332 } 3333 3334 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n) 3335 { 3336 vdev->vq[n].signalled_used_valid = false; 3337 } 3338 3339 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n) 3340 { 3341 return vdev->vq + n; 3342 } 3343 3344 uint16_t virtio_get_queue_index(VirtQueue *vq) 3345 { 3346 return vq->queue_index; 3347 } 3348 3349 static void virtio_queue_guest_notifier_read(EventNotifier *n) 3350 { 3351 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier); 3352 if (event_notifier_test_and_clear(n)) { 3353 virtio_irq(vq); 3354 } 3355 } 3356 3357 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, 3358 bool with_irqfd) 3359 { 3360 if (assign && !with_irqfd) { 3361 event_notifier_set_handler(&vq->guest_notifier, 3362 virtio_queue_guest_notifier_read); 3363 } else { 3364 event_notifier_set_handler(&vq->guest_notifier, NULL); 3365 } 3366 if (!assign) { 3367 /* Test and clear notifier before closing it, 3368 * in case poll callback didn't have time to run. */ 3369 virtio_queue_guest_notifier_read(&vq->guest_notifier); 3370 } 3371 } 3372 3373 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) 3374 { 3375 return &vq->guest_notifier; 3376 } 3377 3378 static void virtio_queue_host_notifier_aio_read(EventNotifier *n) 3379 { 3380 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 3381 if (event_notifier_test_and_clear(n)) { 3382 virtio_queue_notify_aio_vq(vq); 3383 } 3384 } 3385 3386 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n) 3387 { 3388 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 3389 3390 virtio_queue_set_notification(vq, 0); 3391 } 3392 3393 static bool virtio_queue_host_notifier_aio_poll(void *opaque) 3394 { 3395 EventNotifier *n = opaque; 3396 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 3397 bool progress; 3398 3399 if (!vq->vring.desc || virtio_queue_empty(vq)) { 3400 return false; 3401 } 3402 3403 progress = virtio_queue_notify_aio_vq(vq); 3404 3405 /* In case the handler function re-enabled notifications */ 3406 virtio_queue_set_notification(vq, 0); 3407 return progress; 3408 } 3409 3410 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) 3411 { 3412 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 3413 3414 /* Caller polls once more after this to catch requests that race with us */ 3415 virtio_queue_set_notification(vq, 1); 3416 } 3417 3418 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx, 3419 VirtIOHandleAIOOutput handle_output) 3420 { 3421 if (handle_output) { 3422 vq->handle_aio_output = handle_output; 3423 aio_set_event_notifier(ctx, &vq->host_notifier, true, 3424 virtio_queue_host_notifier_aio_read, 3425 virtio_queue_host_notifier_aio_poll); 3426 aio_set_event_notifier_poll(ctx, &vq->host_notifier, 3427 virtio_queue_host_notifier_aio_poll_begin, 3428 virtio_queue_host_notifier_aio_poll_end); 3429 } else { 3430 aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL); 3431 /* Test and clear notifier before after disabling event, 3432 * in case poll callback didn't have time to run. */ 3433 virtio_queue_host_notifier_aio_read(&vq->host_notifier); 3434 vq->handle_aio_output = NULL; 3435 } 3436 } 3437 3438 void virtio_queue_host_notifier_read(EventNotifier *n) 3439 { 3440 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 3441 if (event_notifier_test_and_clear(n)) { 3442 virtio_queue_notify_vq(vq); 3443 } 3444 } 3445 3446 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) 3447 { 3448 return &vq->host_notifier; 3449 } 3450 3451 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n, 3452 MemoryRegion *mr, bool assign) 3453 { 3454 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 3455 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 3456 3457 if (k->set_host_notifier_mr) { 3458 return k->set_host_notifier_mr(qbus->parent, n, mr, assign); 3459 } 3460 3461 return -1; 3462 } 3463 3464 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) 3465 { 3466 g_free(vdev->bus_name); 3467 vdev->bus_name = g_strdup(bus_name); 3468 } 3469 3470 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...) 3471 { 3472 va_list ap; 3473 3474 va_start(ap, fmt); 3475 error_vreport(fmt, ap); 3476 va_end(ap); 3477 3478 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 3479 vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET; 3480 virtio_notify_config(vdev); 3481 } 3482 3483 vdev->broken = true; 3484 } 3485 3486 static void virtio_memory_listener_commit(MemoryListener *listener) 3487 { 3488 VirtIODevice *vdev = container_of(listener, VirtIODevice, listener); 3489 int i; 3490 3491 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 3492 if (vdev->vq[i].vring.num == 0) { 3493 break; 3494 } 3495 virtio_init_region_cache(vdev, i); 3496 } 3497 } 3498 3499 static void virtio_device_realize(DeviceState *dev, Error **errp) 3500 { 3501 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3502 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3503 Error *err = NULL; 3504 3505 /* Devices should either use vmsd or the load/save methods */ 3506 assert(!vdc->vmsd || !vdc->load); 3507 3508 if (vdc->realize != NULL) { 3509 vdc->realize(dev, &err); 3510 if (err != NULL) { 3511 error_propagate(errp, err); 3512 return; 3513 } 3514 } 3515 3516 virtio_bus_device_plugged(vdev, &err); 3517 if (err != NULL) { 3518 error_propagate(errp, err); 3519 vdc->unrealize(dev, NULL); 3520 return; 3521 } 3522 3523 vdev->listener.commit = virtio_memory_listener_commit; 3524 memory_listener_register(&vdev->listener, vdev->dma_as); 3525 } 3526 3527 static void virtio_device_unrealize(DeviceState *dev, Error **errp) 3528 { 3529 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3530 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3531 Error *err = NULL; 3532 3533 virtio_bus_device_unplugged(vdev); 3534 3535 if (vdc->unrealize != NULL) { 3536 vdc->unrealize(dev, &err); 3537 if (err != NULL) { 3538 error_propagate(errp, err); 3539 return; 3540 } 3541 } 3542 3543 g_free(vdev->bus_name); 3544 vdev->bus_name = NULL; 3545 } 3546 3547 static void virtio_device_free_virtqueues(VirtIODevice *vdev) 3548 { 3549 int i; 3550 if (!vdev->vq) { 3551 return; 3552 } 3553 3554 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 3555 if (vdev->vq[i].vring.num == 0) { 3556 break; 3557 } 3558 virtio_virtqueue_reset_region_cache(&vdev->vq[i]); 3559 } 3560 g_free(vdev->vq); 3561 } 3562 3563 static void virtio_device_instance_finalize(Object *obj) 3564 { 3565 VirtIODevice *vdev = VIRTIO_DEVICE(obj); 3566 3567 memory_listener_unregister(&vdev->listener); 3568 virtio_device_free_virtqueues(vdev); 3569 3570 g_free(vdev->config); 3571 g_free(vdev->vector_queues); 3572 } 3573 3574 static Property virtio_properties[] = { 3575 DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features), 3576 DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true), 3577 DEFINE_PROP_END_OF_LIST(), 3578 }; 3579 3580 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev) 3581 { 3582 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); 3583 int i, n, r, err; 3584 3585 memory_region_transaction_begin(); 3586 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { 3587 VirtQueue *vq = &vdev->vq[n]; 3588 if (!virtio_queue_get_num(vdev, n)) { 3589 continue; 3590 } 3591 r = virtio_bus_set_host_notifier(qbus, n, true); 3592 if (r < 0) { 3593 err = r; 3594 goto assign_error; 3595 } 3596 event_notifier_set_handler(&vq->host_notifier, 3597 virtio_queue_host_notifier_read); 3598 } 3599 3600 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { 3601 /* Kick right away to begin processing requests already in vring */ 3602 VirtQueue *vq = &vdev->vq[n]; 3603 if (!vq->vring.num) { 3604 continue; 3605 } 3606 event_notifier_set(&vq->host_notifier); 3607 } 3608 memory_region_transaction_commit(); 3609 return 0; 3610 3611 assign_error: 3612 i = n; /* save n for a second iteration after transaction is committed. */ 3613 while (--n >= 0) { 3614 VirtQueue *vq = &vdev->vq[n]; 3615 if (!virtio_queue_get_num(vdev, n)) { 3616 continue; 3617 } 3618 3619 event_notifier_set_handler(&vq->host_notifier, NULL); 3620 r = virtio_bus_set_host_notifier(qbus, n, false); 3621 assert(r >= 0); 3622 } 3623 memory_region_transaction_commit(); 3624 3625 while (--i >= 0) { 3626 if (!virtio_queue_get_num(vdev, i)) { 3627 continue; 3628 } 3629 virtio_bus_cleanup_host_notifier(qbus, i); 3630 } 3631 return err; 3632 } 3633 3634 int virtio_device_start_ioeventfd(VirtIODevice *vdev) 3635 { 3636 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 3637 VirtioBusState *vbus = VIRTIO_BUS(qbus); 3638 3639 return virtio_bus_start_ioeventfd(vbus); 3640 } 3641 3642 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev) 3643 { 3644 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); 3645 int n, r; 3646 3647 memory_region_transaction_begin(); 3648 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { 3649 VirtQueue *vq = &vdev->vq[n]; 3650 3651 if (!virtio_queue_get_num(vdev, n)) { 3652 continue; 3653 } 3654 event_notifier_set_handler(&vq->host_notifier, NULL); 3655 r = virtio_bus_set_host_notifier(qbus, n, false); 3656 assert(r >= 0); 3657 } 3658 memory_region_transaction_commit(); 3659 3660 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { 3661 if (!virtio_queue_get_num(vdev, n)) { 3662 continue; 3663 } 3664 virtio_bus_cleanup_host_notifier(qbus, n); 3665 } 3666 } 3667 3668 int virtio_device_grab_ioeventfd(VirtIODevice *vdev) 3669 { 3670 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 3671 VirtioBusState *vbus = VIRTIO_BUS(qbus); 3672 3673 return virtio_bus_grab_ioeventfd(vbus); 3674 } 3675 3676 void virtio_device_release_ioeventfd(VirtIODevice *vdev) 3677 { 3678 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 3679 VirtioBusState *vbus = VIRTIO_BUS(qbus); 3680 3681 virtio_bus_release_ioeventfd(vbus); 3682 } 3683 3684 static void virtio_device_class_init(ObjectClass *klass, void *data) 3685 { 3686 /* Set the default value here. */ 3687 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 3688 DeviceClass *dc = DEVICE_CLASS(klass); 3689 3690 dc->realize = virtio_device_realize; 3691 dc->unrealize = virtio_device_unrealize; 3692 dc->bus_type = TYPE_VIRTIO_BUS; 3693 dc->props = virtio_properties; 3694 vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl; 3695 vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl; 3696 3697 vdc->legacy_features |= VIRTIO_LEGACY_FEATURES; 3698 } 3699 3700 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev) 3701 { 3702 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 3703 VirtioBusState *vbus = VIRTIO_BUS(qbus); 3704 3705 return virtio_bus_ioeventfd_enabled(vbus); 3706 } 3707 3708 static const TypeInfo virtio_device_info = { 3709 .name = TYPE_VIRTIO_DEVICE, 3710 .parent = TYPE_DEVICE, 3711 .instance_size = sizeof(VirtIODevice), 3712 .class_init = virtio_device_class_init, 3713 .instance_finalize = virtio_device_instance_finalize, 3714 .abstract = true, 3715 .class_size = sizeof(VirtioDeviceClass), 3716 }; 3717 3718 static void virtio_register_types(void) 3719 { 3720 type_register_static(&virtio_device_info); 3721 } 3722 3723 type_init(virtio_register_types) 3724