1 /* 2 * Virtio Support 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qapi/error.h" 16 #include "qemu-common.h" 17 #include "cpu.h" 18 #include "trace.h" 19 #include "exec/address-spaces.h" 20 #include "qemu/error-report.h" 21 #include "hw/virtio/virtio.h" 22 #include "qemu/atomic.h" 23 #include "hw/virtio/virtio-bus.h" 24 #include "migration/migration.h" 25 #include "hw/virtio/virtio-access.h" 26 27 /* 28 * The alignment to use between consumer and producer parts of vring. 29 * x86 pagesize again. This is the default, used by transports like PCI 30 * which don't provide a means for the guest to tell the host the alignment. 31 */ 32 #define VIRTIO_PCI_VRING_ALIGN 4096 33 34 typedef struct VRingDesc 35 { 36 uint64_t addr; 37 uint32_t len; 38 uint16_t flags; 39 uint16_t next; 40 } VRingDesc; 41 42 typedef struct VRingAvail 43 { 44 uint16_t flags; 45 uint16_t idx; 46 uint16_t ring[0]; 47 } VRingAvail; 48 49 typedef struct VRingUsedElem 50 { 51 uint32_t id; 52 uint32_t len; 53 } VRingUsedElem; 54 55 typedef struct VRingUsed 56 { 57 uint16_t flags; 58 uint16_t idx; 59 VRingUsedElem ring[0]; 60 } VRingUsed; 61 62 typedef struct VRing 63 { 64 unsigned int num; 65 unsigned int num_default; 66 unsigned int align; 67 hwaddr desc; 68 hwaddr avail; 69 hwaddr used; 70 } VRing; 71 72 struct VirtQueue 73 { 74 VRing vring; 75 76 /* Next head to pop */ 77 uint16_t last_avail_idx; 78 79 /* Last avail_idx read from VQ. */ 80 uint16_t shadow_avail_idx; 81 82 uint16_t used_idx; 83 84 /* Last used index value we have signalled on */ 85 uint16_t signalled_used; 86 87 /* Last used index value we have signalled on */ 88 bool signalled_used_valid; 89 90 /* Nested host->guest notification disabled counter */ 91 unsigned int notification_disabled; 92 93 uint16_t queue_index; 94 95 int inuse; 96 97 uint16_t vector; 98 VirtIOHandleOutput handle_output; 99 VirtIOHandleOutput handle_aio_output; 100 VirtIODevice *vdev; 101 EventNotifier guest_notifier; 102 EventNotifier host_notifier; 103 QLIST_ENTRY(VirtQueue) node; 104 }; 105 106 /* virt queue functions */ 107 void virtio_queue_update_rings(VirtIODevice *vdev, int n) 108 { 109 VRing *vring = &vdev->vq[n].vring; 110 111 if (!vring->desc) { 112 /* not yet setup -> nothing to do */ 113 return; 114 } 115 vring->avail = vring->desc + vring->num * sizeof(VRingDesc); 116 vring->used = vring_align(vring->avail + 117 offsetof(VRingAvail, ring[vring->num]), 118 vring->align); 119 } 120 121 static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, 122 hwaddr desc_pa, int i) 123 { 124 address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc), 125 MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc)); 126 virtio_tswap64s(vdev, &desc->addr); 127 virtio_tswap32s(vdev, &desc->len); 128 virtio_tswap16s(vdev, &desc->flags); 129 virtio_tswap16s(vdev, &desc->next); 130 } 131 132 static inline uint16_t vring_avail_flags(VirtQueue *vq) 133 { 134 hwaddr pa; 135 pa = vq->vring.avail + offsetof(VRingAvail, flags); 136 return virtio_lduw_phys(vq->vdev, pa); 137 } 138 139 static inline uint16_t vring_avail_idx(VirtQueue *vq) 140 { 141 hwaddr pa; 142 pa = vq->vring.avail + offsetof(VRingAvail, idx); 143 vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa); 144 return vq->shadow_avail_idx; 145 } 146 147 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) 148 { 149 hwaddr pa; 150 pa = vq->vring.avail + offsetof(VRingAvail, ring[i]); 151 return virtio_lduw_phys(vq->vdev, pa); 152 } 153 154 static inline uint16_t vring_get_used_event(VirtQueue *vq) 155 { 156 return vring_avail_ring(vq, vq->vring.num); 157 } 158 159 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem, 160 int i) 161 { 162 hwaddr pa; 163 virtio_tswap32s(vq->vdev, &uelem->id); 164 virtio_tswap32s(vq->vdev, &uelem->len); 165 pa = vq->vring.used + offsetof(VRingUsed, ring[i]); 166 address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED, 167 (void *)uelem, sizeof(VRingUsedElem)); 168 } 169 170 static uint16_t vring_used_idx(VirtQueue *vq) 171 { 172 hwaddr pa; 173 pa = vq->vring.used + offsetof(VRingUsed, idx); 174 return virtio_lduw_phys(vq->vdev, pa); 175 } 176 177 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) 178 { 179 hwaddr pa; 180 pa = vq->vring.used + offsetof(VRingUsed, idx); 181 virtio_stw_phys(vq->vdev, pa, val); 182 vq->used_idx = val; 183 } 184 185 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) 186 { 187 VirtIODevice *vdev = vq->vdev; 188 hwaddr pa; 189 pa = vq->vring.used + offsetof(VRingUsed, flags); 190 virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask); 191 } 192 193 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) 194 { 195 VirtIODevice *vdev = vq->vdev; 196 hwaddr pa; 197 pa = vq->vring.used + offsetof(VRingUsed, flags); 198 virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask); 199 } 200 201 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) 202 { 203 hwaddr pa; 204 if (vq->notification_disabled) { 205 return; 206 } 207 pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]); 208 virtio_stw_phys(vq->vdev, pa, val); 209 } 210 211 void virtio_queue_set_notification(VirtQueue *vq, int enable) 212 { 213 if (enable) { 214 assert(vq->notification_disabled > 0); 215 vq->notification_disabled--; 216 } else { 217 vq->notification_disabled++; 218 } 219 220 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { 221 vring_set_avail_event(vq, vring_avail_idx(vq)); 222 } else if (enable) { 223 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); 224 } else { 225 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); 226 } 227 if (enable) { 228 /* Expose avail event/used flags before caller checks the avail idx. */ 229 smp_mb(); 230 } 231 } 232 233 int virtio_queue_ready(VirtQueue *vq) 234 { 235 return vq->vring.avail != 0; 236 } 237 238 /* Fetch avail_idx from VQ memory only when we really need to know if 239 * guest has added some buffers. */ 240 int virtio_queue_empty(VirtQueue *vq) 241 { 242 if (vq->shadow_avail_idx != vq->last_avail_idx) { 243 return 0; 244 } 245 246 return vring_avail_idx(vq) == vq->last_avail_idx; 247 } 248 249 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, 250 unsigned int len) 251 { 252 unsigned int offset; 253 int i; 254 255 offset = 0; 256 for (i = 0; i < elem->in_num; i++) { 257 size_t size = MIN(len - offset, elem->in_sg[i].iov_len); 258 259 cpu_physical_memory_unmap(elem->in_sg[i].iov_base, 260 elem->in_sg[i].iov_len, 261 1, size); 262 263 offset += size; 264 } 265 266 for (i = 0; i < elem->out_num; i++) 267 cpu_physical_memory_unmap(elem->out_sg[i].iov_base, 268 elem->out_sg[i].iov_len, 269 0, elem->out_sg[i].iov_len); 270 } 271 272 /* virtqueue_detach_element: 273 * @vq: The #VirtQueue 274 * @elem: The #VirtQueueElement 275 * @len: number of bytes written 276 * 277 * Detach the element from the virtqueue. This function is suitable for device 278 * reset or other situations where a #VirtQueueElement is simply freed and will 279 * not be pushed or discarded. 280 */ 281 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem, 282 unsigned int len) 283 { 284 vq->inuse--; 285 virtqueue_unmap_sg(vq, elem, len); 286 } 287 288 /* virtqueue_unpop: 289 * @vq: The #VirtQueue 290 * @elem: The #VirtQueueElement 291 * @len: number of bytes written 292 * 293 * Pretend the most recent element wasn't popped from the virtqueue. The next 294 * call to virtqueue_pop() will refetch the element. 295 */ 296 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem, 297 unsigned int len) 298 { 299 vq->last_avail_idx--; 300 virtqueue_detach_element(vq, elem, len); 301 } 302 303 /* virtqueue_rewind: 304 * @vq: The #VirtQueue 305 * @num: Number of elements to push back 306 * 307 * Pretend that elements weren't popped from the virtqueue. The next 308 * virtqueue_pop() will refetch the oldest element. 309 * 310 * Use virtqueue_unpop() instead if you have a VirtQueueElement. 311 * 312 * Returns: true on success, false if @num is greater than the number of in use 313 * elements. 314 */ 315 bool virtqueue_rewind(VirtQueue *vq, unsigned int num) 316 { 317 if (num > vq->inuse) { 318 return false; 319 } 320 vq->last_avail_idx -= num; 321 vq->inuse -= num; 322 return true; 323 } 324 325 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, 326 unsigned int len, unsigned int idx) 327 { 328 VRingUsedElem uelem; 329 330 trace_virtqueue_fill(vq, elem, len, idx); 331 332 virtqueue_unmap_sg(vq, elem, len); 333 334 if (unlikely(vq->vdev->broken)) { 335 return; 336 } 337 338 idx = (idx + vq->used_idx) % vq->vring.num; 339 340 uelem.id = elem->index; 341 uelem.len = len; 342 vring_used_write(vq, &uelem, idx); 343 } 344 345 void virtqueue_flush(VirtQueue *vq, unsigned int count) 346 { 347 uint16_t old, new; 348 349 if (unlikely(vq->vdev->broken)) { 350 vq->inuse -= count; 351 return; 352 } 353 354 /* Make sure buffer is written before we update index. */ 355 smp_wmb(); 356 trace_virtqueue_flush(vq, count); 357 old = vq->used_idx; 358 new = old + count; 359 vring_used_idx_set(vq, new); 360 vq->inuse -= count; 361 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) 362 vq->signalled_used_valid = false; 363 } 364 365 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, 366 unsigned int len) 367 { 368 virtqueue_fill(vq, elem, len, 0); 369 virtqueue_flush(vq, 1); 370 } 371 372 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) 373 { 374 uint16_t num_heads = vring_avail_idx(vq) - idx; 375 376 /* Check it isn't doing very strange things with descriptor numbers. */ 377 if (num_heads > vq->vring.num) { 378 virtio_error(vq->vdev, "Guest moved used index from %u to %u", 379 idx, vq->shadow_avail_idx); 380 return -EINVAL; 381 } 382 /* On success, callers read a descriptor at vq->last_avail_idx. 383 * Make sure descriptor read does not bypass avail index read. */ 384 if (num_heads) { 385 smp_rmb(); 386 } 387 388 return num_heads; 389 } 390 391 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx, 392 unsigned int *head) 393 { 394 /* Grab the next descriptor number they're advertising, and increment 395 * the index we've seen. */ 396 *head = vring_avail_ring(vq, idx % vq->vring.num); 397 398 /* If their number is silly, that's a fatal mistake. */ 399 if (*head >= vq->vring.num) { 400 virtio_error(vq->vdev, "Guest says index %u is available", *head); 401 return false; 402 } 403 404 return true; 405 } 406 407 enum { 408 VIRTQUEUE_READ_DESC_ERROR = -1, 409 VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */ 410 VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */ 411 }; 412 413 static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, 414 hwaddr desc_pa, unsigned int max, 415 unsigned int *next) 416 { 417 /* If this descriptor says it doesn't chain, we're done. */ 418 if (!(desc->flags & VRING_DESC_F_NEXT)) { 419 return VIRTQUEUE_READ_DESC_DONE; 420 } 421 422 /* Check they're not leading us off end of descriptors. */ 423 *next = desc->next; 424 /* Make sure compiler knows to grab that: we don't want it changing! */ 425 smp_wmb(); 426 427 if (*next >= max) { 428 virtio_error(vdev, "Desc next is %u", *next); 429 return VIRTQUEUE_READ_DESC_ERROR; 430 } 431 432 vring_desc_read(vdev, desc, desc_pa, *next); 433 return VIRTQUEUE_READ_DESC_MORE; 434 } 435 436 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, 437 unsigned int *out_bytes, 438 unsigned max_in_bytes, unsigned max_out_bytes) 439 { 440 unsigned int idx; 441 unsigned int total_bufs, in_total, out_total; 442 int rc; 443 444 idx = vq->last_avail_idx; 445 446 total_bufs = in_total = out_total = 0; 447 while ((rc = virtqueue_num_heads(vq, idx)) > 0) { 448 VirtIODevice *vdev = vq->vdev; 449 unsigned int max, num_bufs, indirect = 0; 450 VRingDesc desc; 451 hwaddr desc_pa; 452 unsigned int i; 453 454 max = vq->vring.num; 455 num_bufs = total_bufs; 456 457 if (!virtqueue_get_head(vq, idx++, &i)) { 458 goto err; 459 } 460 461 desc_pa = vq->vring.desc; 462 vring_desc_read(vdev, &desc, desc_pa, i); 463 464 if (desc.flags & VRING_DESC_F_INDIRECT) { 465 if (desc.len % sizeof(VRingDesc)) { 466 virtio_error(vdev, "Invalid size for indirect buffer table"); 467 goto err; 468 } 469 470 /* If we've got too many, that implies a descriptor loop. */ 471 if (num_bufs >= max) { 472 virtio_error(vdev, "Looped descriptor"); 473 goto err; 474 } 475 476 /* loop over the indirect descriptor table */ 477 indirect = 1; 478 max = desc.len / sizeof(VRingDesc); 479 desc_pa = desc.addr; 480 num_bufs = i = 0; 481 vring_desc_read(vdev, &desc, desc_pa, i); 482 } 483 484 do { 485 /* If we've got too many, that implies a descriptor loop. */ 486 if (++num_bufs > max) { 487 virtio_error(vdev, "Looped descriptor"); 488 goto err; 489 } 490 491 if (desc.flags & VRING_DESC_F_WRITE) { 492 in_total += desc.len; 493 } else { 494 out_total += desc.len; 495 } 496 if (in_total >= max_in_bytes && out_total >= max_out_bytes) { 497 goto done; 498 } 499 500 rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i); 501 } while (rc == VIRTQUEUE_READ_DESC_MORE); 502 503 if (rc == VIRTQUEUE_READ_DESC_ERROR) { 504 goto err; 505 } 506 507 if (!indirect) 508 total_bufs = num_bufs; 509 else 510 total_bufs++; 511 } 512 513 if (rc < 0) { 514 goto err; 515 } 516 517 done: 518 if (in_bytes) { 519 *in_bytes = in_total; 520 } 521 if (out_bytes) { 522 *out_bytes = out_total; 523 } 524 return; 525 526 err: 527 in_total = out_total = 0; 528 goto done; 529 } 530 531 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, 532 unsigned int out_bytes) 533 { 534 unsigned int in_total, out_total; 535 536 virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes); 537 return in_bytes <= in_total && out_bytes <= out_total; 538 } 539 540 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg, 541 hwaddr *addr, struct iovec *iov, 542 unsigned int max_num_sg, bool is_write, 543 hwaddr pa, size_t sz) 544 { 545 bool ok = false; 546 unsigned num_sg = *p_num_sg; 547 assert(num_sg <= max_num_sg); 548 549 if (!sz) { 550 virtio_error(vdev, "virtio: zero sized buffers are not allowed"); 551 goto out; 552 } 553 554 while (sz) { 555 hwaddr len = sz; 556 557 if (num_sg == max_num_sg) { 558 virtio_error(vdev, "virtio: too many write descriptors in " 559 "indirect table"); 560 goto out; 561 } 562 563 iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write); 564 if (!iov[num_sg].iov_base) { 565 virtio_error(vdev, "virtio: bogus descriptor or out of resources"); 566 goto out; 567 } 568 569 iov[num_sg].iov_len = len; 570 addr[num_sg] = pa; 571 572 sz -= len; 573 pa += len; 574 num_sg++; 575 } 576 ok = true; 577 578 out: 579 *p_num_sg = num_sg; 580 return ok; 581 } 582 583 /* Only used by error code paths before we have a VirtQueueElement (therefore 584 * virtqueue_unmap_sg() can't be used). Assumes buffers weren't written to 585 * yet. 586 */ 587 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num, 588 struct iovec *iov) 589 { 590 unsigned int i; 591 592 for (i = 0; i < out_num + in_num; i++) { 593 int is_write = i >= out_num; 594 595 cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0); 596 iov++; 597 } 598 } 599 600 static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr, 601 unsigned int *num_sg, unsigned int max_size, 602 int is_write) 603 { 604 unsigned int i; 605 hwaddr len; 606 607 /* Note: this function MUST validate input, some callers 608 * are passing in num_sg values received over the network. 609 */ 610 /* TODO: teach all callers that this can fail, and return failure instead 611 * of asserting here. 612 * When we do, we might be able to re-enable NDEBUG below. 613 */ 614 #ifdef NDEBUG 615 #error building with NDEBUG is not supported 616 #endif 617 assert(*num_sg <= max_size); 618 619 for (i = 0; i < *num_sg; i++) { 620 len = sg[i].iov_len; 621 sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write); 622 if (!sg[i].iov_base) { 623 error_report("virtio: error trying to map MMIO memory"); 624 exit(1); 625 } 626 if (len != sg[i].iov_len) { 627 error_report("virtio: unexpected memory split"); 628 exit(1); 629 } 630 } 631 } 632 633 void virtqueue_map(VirtQueueElement *elem) 634 { 635 virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num, 636 VIRTQUEUE_MAX_SIZE, 1); 637 virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num, 638 VIRTQUEUE_MAX_SIZE, 0); 639 } 640 641 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num) 642 { 643 VirtQueueElement *elem; 644 size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0])); 645 size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]); 646 size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]); 647 size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0])); 648 size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]); 649 size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]); 650 651 assert(sz >= sizeof(VirtQueueElement)); 652 elem = g_malloc(out_sg_end); 653 elem->out_num = out_num; 654 elem->in_num = in_num; 655 elem->in_addr = (void *)elem + in_addr_ofs; 656 elem->out_addr = (void *)elem + out_addr_ofs; 657 elem->in_sg = (void *)elem + in_sg_ofs; 658 elem->out_sg = (void *)elem + out_sg_ofs; 659 return elem; 660 } 661 662 void *virtqueue_pop(VirtQueue *vq, size_t sz) 663 { 664 unsigned int i, head, max; 665 hwaddr desc_pa = vq->vring.desc; 666 VirtIODevice *vdev = vq->vdev; 667 VirtQueueElement *elem; 668 unsigned out_num, in_num; 669 hwaddr addr[VIRTQUEUE_MAX_SIZE]; 670 struct iovec iov[VIRTQUEUE_MAX_SIZE]; 671 VRingDesc desc; 672 int rc; 673 674 if (unlikely(vdev->broken)) { 675 return NULL; 676 } 677 if (virtio_queue_empty(vq)) { 678 return NULL; 679 } 680 /* Needed after virtio_queue_empty(), see comment in 681 * virtqueue_num_heads(). */ 682 smp_rmb(); 683 684 /* When we start there are none of either input nor output. */ 685 out_num = in_num = 0; 686 687 max = vq->vring.num; 688 689 if (vq->inuse >= vq->vring.num) { 690 virtio_error(vdev, "Virtqueue size exceeded"); 691 return NULL; 692 } 693 694 if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) { 695 return NULL; 696 } 697 698 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 699 vring_set_avail_event(vq, vq->last_avail_idx); 700 } 701 702 i = head; 703 vring_desc_read(vdev, &desc, desc_pa, i); 704 if (desc.flags & VRING_DESC_F_INDIRECT) { 705 if (desc.len % sizeof(VRingDesc)) { 706 virtio_error(vdev, "Invalid size for indirect buffer table"); 707 return NULL; 708 } 709 710 /* loop over the indirect descriptor table */ 711 max = desc.len / sizeof(VRingDesc); 712 desc_pa = desc.addr; 713 i = 0; 714 vring_desc_read(vdev, &desc, desc_pa, i); 715 } 716 717 /* Collect all the descriptors */ 718 do { 719 bool map_ok; 720 721 if (desc.flags & VRING_DESC_F_WRITE) { 722 map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num, 723 iov + out_num, 724 VIRTQUEUE_MAX_SIZE - out_num, true, 725 desc.addr, desc.len); 726 } else { 727 if (in_num) { 728 virtio_error(vdev, "Incorrect order for descriptors"); 729 goto err_undo_map; 730 } 731 map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov, 732 VIRTQUEUE_MAX_SIZE, false, 733 desc.addr, desc.len); 734 } 735 if (!map_ok) { 736 goto err_undo_map; 737 } 738 739 /* If we've got too many, that implies a descriptor loop. */ 740 if ((in_num + out_num) > max) { 741 virtio_error(vdev, "Looped descriptor"); 742 goto err_undo_map; 743 } 744 745 rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i); 746 } while (rc == VIRTQUEUE_READ_DESC_MORE); 747 748 if (rc == VIRTQUEUE_READ_DESC_ERROR) { 749 goto err_undo_map; 750 } 751 752 /* Now copy what we have collected and mapped */ 753 elem = virtqueue_alloc_element(sz, out_num, in_num); 754 elem->index = head; 755 for (i = 0; i < out_num; i++) { 756 elem->out_addr[i] = addr[i]; 757 elem->out_sg[i] = iov[i]; 758 } 759 for (i = 0; i < in_num; i++) { 760 elem->in_addr[i] = addr[out_num + i]; 761 elem->in_sg[i] = iov[out_num + i]; 762 } 763 764 vq->inuse++; 765 766 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); 767 return elem; 768 769 err_undo_map: 770 virtqueue_undo_map_desc(out_num, in_num, iov); 771 return NULL; 772 } 773 774 /* Reading and writing a structure directly to QEMUFile is *awful*, but 775 * it is what QEMU has always done by mistake. We can change it sooner 776 * or later by bumping the version number of the affected vm states. 777 * In the meanwhile, since the in-memory layout of VirtQueueElement 778 * has changed, we need to marshal to and from the layout that was 779 * used before the change. 780 */ 781 typedef struct VirtQueueElementOld { 782 unsigned int index; 783 unsigned int out_num; 784 unsigned int in_num; 785 hwaddr in_addr[VIRTQUEUE_MAX_SIZE]; 786 hwaddr out_addr[VIRTQUEUE_MAX_SIZE]; 787 struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; 788 struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; 789 } VirtQueueElementOld; 790 791 void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz) 792 { 793 VirtQueueElement *elem; 794 VirtQueueElementOld data; 795 int i; 796 797 qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); 798 799 elem = virtqueue_alloc_element(sz, data.out_num, data.in_num); 800 elem->index = data.index; 801 802 for (i = 0; i < elem->in_num; i++) { 803 elem->in_addr[i] = data.in_addr[i]; 804 } 805 806 for (i = 0; i < elem->out_num; i++) { 807 elem->out_addr[i] = data.out_addr[i]; 808 } 809 810 for (i = 0; i < elem->in_num; i++) { 811 /* Base is overwritten by virtqueue_map. */ 812 elem->in_sg[i].iov_base = 0; 813 elem->in_sg[i].iov_len = data.in_sg[i].iov_len; 814 } 815 816 for (i = 0; i < elem->out_num; i++) { 817 /* Base is overwritten by virtqueue_map. */ 818 elem->out_sg[i].iov_base = 0; 819 elem->out_sg[i].iov_len = data.out_sg[i].iov_len; 820 } 821 822 virtqueue_map(elem); 823 return elem; 824 } 825 826 void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem) 827 { 828 VirtQueueElementOld data; 829 int i; 830 831 memset(&data, 0, sizeof(data)); 832 data.index = elem->index; 833 data.in_num = elem->in_num; 834 data.out_num = elem->out_num; 835 836 for (i = 0; i < elem->in_num; i++) { 837 data.in_addr[i] = elem->in_addr[i]; 838 } 839 840 for (i = 0; i < elem->out_num; i++) { 841 data.out_addr[i] = elem->out_addr[i]; 842 } 843 844 for (i = 0; i < elem->in_num; i++) { 845 /* Base is overwritten by virtqueue_map when loading. Do not 846 * save it, as it would leak the QEMU address space layout. */ 847 data.in_sg[i].iov_len = elem->in_sg[i].iov_len; 848 } 849 850 for (i = 0; i < elem->out_num; i++) { 851 /* Do not save iov_base as above. */ 852 data.out_sg[i].iov_len = elem->out_sg[i].iov_len; 853 } 854 qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); 855 } 856 857 /* virtio device */ 858 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector) 859 { 860 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 861 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 862 863 if (unlikely(vdev->broken)) { 864 return; 865 } 866 867 if (k->notify) { 868 k->notify(qbus->parent, vector); 869 } 870 } 871 872 void virtio_update_irq(VirtIODevice *vdev) 873 { 874 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 875 } 876 877 static int virtio_validate_features(VirtIODevice *vdev) 878 { 879 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 880 881 if (k->validate_features) { 882 return k->validate_features(vdev); 883 } else { 884 return 0; 885 } 886 } 887 888 int virtio_set_status(VirtIODevice *vdev, uint8_t val) 889 { 890 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 891 trace_virtio_set_status(vdev, val); 892 893 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 894 if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) && 895 val & VIRTIO_CONFIG_S_FEATURES_OK) { 896 int ret = virtio_validate_features(vdev); 897 898 if (ret) { 899 return ret; 900 } 901 } 902 } 903 if (k->set_status) { 904 k->set_status(vdev, val); 905 } 906 vdev->status = val; 907 return 0; 908 } 909 910 bool target_words_bigendian(void); 911 static enum virtio_device_endian virtio_default_endian(void) 912 { 913 if (target_words_bigendian()) { 914 return VIRTIO_DEVICE_ENDIAN_BIG; 915 } else { 916 return VIRTIO_DEVICE_ENDIAN_LITTLE; 917 } 918 } 919 920 static enum virtio_device_endian virtio_current_cpu_endian(void) 921 { 922 CPUClass *cc = CPU_GET_CLASS(current_cpu); 923 924 if (cc->virtio_is_big_endian(current_cpu)) { 925 return VIRTIO_DEVICE_ENDIAN_BIG; 926 } else { 927 return VIRTIO_DEVICE_ENDIAN_LITTLE; 928 } 929 } 930 931 void virtio_reset(void *opaque) 932 { 933 VirtIODevice *vdev = opaque; 934 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 935 int i; 936 937 virtio_set_status(vdev, 0); 938 if (current_cpu) { 939 /* Guest initiated reset */ 940 vdev->device_endian = virtio_current_cpu_endian(); 941 } else { 942 /* System reset */ 943 vdev->device_endian = virtio_default_endian(); 944 } 945 946 if (k->reset) { 947 k->reset(vdev); 948 } 949 950 vdev->broken = false; 951 vdev->guest_features = 0; 952 vdev->queue_sel = 0; 953 vdev->status = 0; 954 atomic_set(&vdev->isr, 0); 955 vdev->config_vector = VIRTIO_NO_VECTOR; 956 virtio_notify_vector(vdev, vdev->config_vector); 957 958 for(i = 0; i < VIRTIO_QUEUE_MAX; i++) { 959 vdev->vq[i].vring.desc = 0; 960 vdev->vq[i].vring.avail = 0; 961 vdev->vq[i].vring.used = 0; 962 vdev->vq[i].last_avail_idx = 0; 963 vdev->vq[i].shadow_avail_idx = 0; 964 vdev->vq[i].used_idx = 0; 965 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); 966 vdev->vq[i].signalled_used = 0; 967 vdev->vq[i].signalled_used_valid = false; 968 vdev->vq[i].notification_disabled = 0; 969 vdev->vq[i].vring.num = vdev->vq[i].vring.num_default; 970 vdev->vq[i].inuse = 0; 971 } 972 } 973 974 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr) 975 { 976 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 977 uint8_t val; 978 979 if (addr + sizeof(val) > vdev->config_len) { 980 return (uint32_t)-1; 981 } 982 983 k->get_config(vdev, vdev->config); 984 985 val = ldub_p(vdev->config + addr); 986 return val; 987 } 988 989 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr) 990 { 991 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 992 uint16_t val; 993 994 if (addr + sizeof(val) > vdev->config_len) { 995 return (uint32_t)-1; 996 } 997 998 k->get_config(vdev, vdev->config); 999 1000 val = lduw_p(vdev->config + addr); 1001 return val; 1002 } 1003 1004 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr) 1005 { 1006 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1007 uint32_t val; 1008 1009 if (addr + sizeof(val) > vdev->config_len) { 1010 return (uint32_t)-1; 1011 } 1012 1013 k->get_config(vdev, vdev->config); 1014 1015 val = ldl_p(vdev->config + addr); 1016 return val; 1017 } 1018 1019 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data) 1020 { 1021 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1022 uint8_t val = data; 1023 1024 if (addr + sizeof(val) > vdev->config_len) { 1025 return; 1026 } 1027 1028 stb_p(vdev->config + addr, val); 1029 1030 if (k->set_config) { 1031 k->set_config(vdev, vdev->config); 1032 } 1033 } 1034 1035 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data) 1036 { 1037 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1038 uint16_t val = data; 1039 1040 if (addr + sizeof(val) > vdev->config_len) { 1041 return; 1042 } 1043 1044 stw_p(vdev->config + addr, val); 1045 1046 if (k->set_config) { 1047 k->set_config(vdev, vdev->config); 1048 } 1049 } 1050 1051 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data) 1052 { 1053 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1054 uint32_t val = data; 1055 1056 if (addr + sizeof(val) > vdev->config_len) { 1057 return; 1058 } 1059 1060 stl_p(vdev->config + addr, val); 1061 1062 if (k->set_config) { 1063 k->set_config(vdev, vdev->config); 1064 } 1065 } 1066 1067 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr) 1068 { 1069 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1070 uint8_t val; 1071 1072 if (addr + sizeof(val) > vdev->config_len) { 1073 return (uint32_t)-1; 1074 } 1075 1076 k->get_config(vdev, vdev->config); 1077 1078 val = ldub_p(vdev->config + addr); 1079 return val; 1080 } 1081 1082 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr) 1083 { 1084 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1085 uint16_t val; 1086 1087 if (addr + sizeof(val) > vdev->config_len) { 1088 return (uint32_t)-1; 1089 } 1090 1091 k->get_config(vdev, vdev->config); 1092 1093 val = lduw_le_p(vdev->config + addr); 1094 return val; 1095 } 1096 1097 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr) 1098 { 1099 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1100 uint32_t val; 1101 1102 if (addr + sizeof(val) > vdev->config_len) { 1103 return (uint32_t)-1; 1104 } 1105 1106 k->get_config(vdev, vdev->config); 1107 1108 val = ldl_le_p(vdev->config + addr); 1109 return val; 1110 } 1111 1112 void virtio_config_modern_writeb(VirtIODevice *vdev, 1113 uint32_t addr, uint32_t data) 1114 { 1115 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1116 uint8_t val = data; 1117 1118 if (addr + sizeof(val) > vdev->config_len) { 1119 return; 1120 } 1121 1122 stb_p(vdev->config + addr, val); 1123 1124 if (k->set_config) { 1125 k->set_config(vdev, vdev->config); 1126 } 1127 } 1128 1129 void virtio_config_modern_writew(VirtIODevice *vdev, 1130 uint32_t addr, uint32_t data) 1131 { 1132 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1133 uint16_t val = data; 1134 1135 if (addr + sizeof(val) > vdev->config_len) { 1136 return; 1137 } 1138 1139 stw_le_p(vdev->config + addr, val); 1140 1141 if (k->set_config) { 1142 k->set_config(vdev, vdev->config); 1143 } 1144 } 1145 1146 void virtio_config_modern_writel(VirtIODevice *vdev, 1147 uint32_t addr, uint32_t data) 1148 { 1149 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1150 uint32_t val = data; 1151 1152 if (addr + sizeof(val) > vdev->config_len) { 1153 return; 1154 } 1155 1156 stl_le_p(vdev->config + addr, val); 1157 1158 if (k->set_config) { 1159 k->set_config(vdev, vdev->config); 1160 } 1161 } 1162 1163 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr) 1164 { 1165 vdev->vq[n].vring.desc = addr; 1166 virtio_queue_update_rings(vdev, n); 1167 } 1168 1169 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n) 1170 { 1171 return vdev->vq[n].vring.desc; 1172 } 1173 1174 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc, 1175 hwaddr avail, hwaddr used) 1176 { 1177 vdev->vq[n].vring.desc = desc; 1178 vdev->vq[n].vring.avail = avail; 1179 vdev->vq[n].vring.used = used; 1180 } 1181 1182 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) 1183 { 1184 /* Don't allow guest to flip queue between existent and 1185 * nonexistent states, or to set it to an invalid size. 1186 */ 1187 if (!!num != !!vdev->vq[n].vring.num || 1188 num > VIRTQUEUE_MAX_SIZE || 1189 num < 0) { 1190 return; 1191 } 1192 vdev->vq[n].vring.num = num; 1193 } 1194 1195 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector) 1196 { 1197 return QLIST_FIRST(&vdev->vector_queues[vector]); 1198 } 1199 1200 VirtQueue *virtio_vector_next_queue(VirtQueue *vq) 1201 { 1202 return QLIST_NEXT(vq, node); 1203 } 1204 1205 int virtio_queue_get_num(VirtIODevice *vdev, int n) 1206 { 1207 return vdev->vq[n].vring.num; 1208 } 1209 1210 int virtio_get_num_queues(VirtIODevice *vdev) 1211 { 1212 int i; 1213 1214 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1215 if (!virtio_queue_get_num(vdev, i)) { 1216 break; 1217 } 1218 } 1219 1220 return i; 1221 } 1222 1223 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) 1224 { 1225 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1226 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1227 1228 /* virtio-1 compliant devices cannot change the alignment */ 1229 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 1230 error_report("tried to modify queue alignment for virtio-1 device"); 1231 return; 1232 } 1233 /* Check that the transport told us it was going to do this 1234 * (so a buggy transport will immediately assert rather than 1235 * silently failing to migrate this state) 1236 */ 1237 assert(k->has_variable_vring_alignment); 1238 1239 vdev->vq[n].vring.align = align; 1240 virtio_queue_update_rings(vdev, n); 1241 } 1242 1243 static void virtio_queue_notify_aio_vq(VirtQueue *vq) 1244 { 1245 if (vq->vring.desc && vq->handle_aio_output) { 1246 VirtIODevice *vdev = vq->vdev; 1247 1248 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); 1249 vq->handle_aio_output(vdev, vq); 1250 } 1251 } 1252 1253 static void virtio_queue_notify_vq(VirtQueue *vq) 1254 { 1255 if (vq->vring.desc && vq->handle_output) { 1256 VirtIODevice *vdev = vq->vdev; 1257 1258 if (unlikely(vdev->broken)) { 1259 return; 1260 } 1261 1262 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); 1263 vq->handle_output(vdev, vq); 1264 } 1265 } 1266 1267 void virtio_queue_notify(VirtIODevice *vdev, int n) 1268 { 1269 virtio_queue_notify_vq(&vdev->vq[n]); 1270 } 1271 1272 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) 1273 { 1274 return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector : 1275 VIRTIO_NO_VECTOR; 1276 } 1277 1278 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector) 1279 { 1280 VirtQueue *vq = &vdev->vq[n]; 1281 1282 if (n < VIRTIO_QUEUE_MAX) { 1283 if (vdev->vector_queues && 1284 vdev->vq[n].vector != VIRTIO_NO_VECTOR) { 1285 QLIST_REMOVE(vq, node); 1286 } 1287 vdev->vq[n].vector = vector; 1288 if (vdev->vector_queues && 1289 vector != VIRTIO_NO_VECTOR) { 1290 QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node); 1291 } 1292 } 1293 } 1294 1295 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, 1296 VirtIOHandleOutput handle_output) 1297 { 1298 int i; 1299 1300 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1301 if (vdev->vq[i].vring.num == 0) 1302 break; 1303 } 1304 1305 if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) 1306 abort(); 1307 1308 vdev->vq[i].vring.num = queue_size; 1309 vdev->vq[i].vring.num_default = queue_size; 1310 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN; 1311 vdev->vq[i].handle_output = handle_output; 1312 vdev->vq[i].handle_aio_output = NULL; 1313 1314 return &vdev->vq[i]; 1315 } 1316 1317 void virtio_del_queue(VirtIODevice *vdev, int n) 1318 { 1319 if (n < 0 || n >= VIRTIO_QUEUE_MAX) { 1320 abort(); 1321 } 1322 1323 vdev->vq[n].vring.num = 0; 1324 vdev->vq[n].vring.num_default = 0; 1325 } 1326 1327 static void virtio_set_isr(VirtIODevice *vdev, int value) 1328 { 1329 uint8_t old = atomic_read(&vdev->isr); 1330 1331 /* Do not write ISR if it does not change, so that its cacheline remains 1332 * shared in the common case where the guest does not read it. 1333 */ 1334 if ((old & value) != value) { 1335 atomic_or(&vdev->isr, value); 1336 } 1337 } 1338 1339 bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) 1340 { 1341 uint16_t old, new; 1342 bool v; 1343 /* We need to expose used array entries before checking used event. */ 1344 smp_mb(); 1345 /* Always notify when queue is empty (when feature acknowledge) */ 1346 if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) && 1347 !vq->inuse && virtio_queue_empty(vq)) { 1348 return true; 1349 } 1350 1351 if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 1352 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); 1353 } 1354 1355 v = vq->signalled_used_valid; 1356 vq->signalled_used_valid = true; 1357 old = vq->signalled_used; 1358 new = vq->signalled_used = vq->used_idx; 1359 return !v || vring_need_event(vring_get_used_event(vq), new, old); 1360 } 1361 1362 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq) 1363 { 1364 if (!virtio_should_notify(vdev, vq)) { 1365 return; 1366 } 1367 1368 trace_virtio_notify_irqfd(vdev, vq); 1369 1370 /* 1371 * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but 1372 * windows drivers included in virtio-win 1.8.0 (circa 2015) are 1373 * incorrectly polling this bit during crashdump and hibernation 1374 * in MSI mode, causing a hang if this bit is never updated. 1375 * Recent releases of Windows do not really shut down, but rather 1376 * log out and hibernate to make the next startup faster. Hence, 1377 * this manifested as a more serious hang during shutdown with 1378 * 1379 * Next driver release from 2016 fixed this problem, so working around it 1380 * is not a must, but it's easy to do so let's do it here. 1381 * 1382 * Note: it's safe to update ISR from any thread as it was switched 1383 * to an atomic operation. 1384 */ 1385 virtio_set_isr(vq->vdev, 0x1); 1386 event_notifier_set(&vq->guest_notifier); 1387 } 1388 1389 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) 1390 { 1391 if (!virtio_should_notify(vdev, vq)) { 1392 return; 1393 } 1394 1395 trace_virtio_notify(vdev, vq); 1396 virtio_set_isr(vq->vdev, 0x1); 1397 virtio_notify_vector(vdev, vq->vector); 1398 } 1399 1400 void virtio_notify_config(VirtIODevice *vdev) 1401 { 1402 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 1403 return; 1404 1405 virtio_set_isr(vdev, 0x3); 1406 vdev->generation++; 1407 virtio_notify_vector(vdev, vdev->config_vector); 1408 } 1409 1410 static bool virtio_device_endian_needed(void *opaque) 1411 { 1412 VirtIODevice *vdev = opaque; 1413 1414 assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN); 1415 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 1416 return vdev->device_endian != virtio_default_endian(); 1417 } 1418 /* Devices conforming to VIRTIO 1.0 or later are always LE. */ 1419 return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE; 1420 } 1421 1422 static bool virtio_64bit_features_needed(void *opaque) 1423 { 1424 VirtIODevice *vdev = opaque; 1425 1426 return (vdev->host_features >> 32) != 0; 1427 } 1428 1429 static bool virtio_virtqueue_needed(void *opaque) 1430 { 1431 VirtIODevice *vdev = opaque; 1432 1433 return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1); 1434 } 1435 1436 static bool virtio_ringsize_needed(void *opaque) 1437 { 1438 VirtIODevice *vdev = opaque; 1439 int i; 1440 1441 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1442 if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) { 1443 return true; 1444 } 1445 } 1446 return false; 1447 } 1448 1449 static bool virtio_extra_state_needed(void *opaque) 1450 { 1451 VirtIODevice *vdev = opaque; 1452 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1453 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1454 1455 return k->has_extra_state && 1456 k->has_extra_state(qbus->parent); 1457 } 1458 1459 static bool virtio_broken_needed(void *opaque) 1460 { 1461 VirtIODevice *vdev = opaque; 1462 1463 return vdev->broken; 1464 } 1465 1466 static const VMStateDescription vmstate_virtqueue = { 1467 .name = "virtqueue_state", 1468 .version_id = 1, 1469 .minimum_version_id = 1, 1470 .fields = (VMStateField[]) { 1471 VMSTATE_UINT64(vring.avail, struct VirtQueue), 1472 VMSTATE_UINT64(vring.used, struct VirtQueue), 1473 VMSTATE_END_OF_LIST() 1474 } 1475 }; 1476 1477 static const VMStateDescription vmstate_virtio_virtqueues = { 1478 .name = "virtio/virtqueues", 1479 .version_id = 1, 1480 .minimum_version_id = 1, 1481 .needed = &virtio_virtqueue_needed, 1482 .fields = (VMStateField[]) { 1483 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, 1484 VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue), 1485 VMSTATE_END_OF_LIST() 1486 } 1487 }; 1488 1489 static const VMStateDescription vmstate_ringsize = { 1490 .name = "ringsize_state", 1491 .version_id = 1, 1492 .minimum_version_id = 1, 1493 .fields = (VMStateField[]) { 1494 VMSTATE_UINT32(vring.num_default, struct VirtQueue), 1495 VMSTATE_END_OF_LIST() 1496 } 1497 }; 1498 1499 static const VMStateDescription vmstate_virtio_ringsize = { 1500 .name = "virtio/ringsize", 1501 .version_id = 1, 1502 .minimum_version_id = 1, 1503 .needed = &virtio_ringsize_needed, 1504 .fields = (VMStateField[]) { 1505 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, 1506 VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue), 1507 VMSTATE_END_OF_LIST() 1508 } 1509 }; 1510 1511 static int get_extra_state(QEMUFile *f, void *pv, size_t size) 1512 { 1513 VirtIODevice *vdev = pv; 1514 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1515 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1516 1517 if (!k->load_extra_state) { 1518 return -1; 1519 } else { 1520 return k->load_extra_state(qbus->parent, f); 1521 } 1522 } 1523 1524 static void put_extra_state(QEMUFile *f, void *pv, size_t size) 1525 { 1526 VirtIODevice *vdev = pv; 1527 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1528 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1529 1530 k->save_extra_state(qbus->parent, f); 1531 } 1532 1533 static const VMStateInfo vmstate_info_extra_state = { 1534 .name = "virtqueue_extra_state", 1535 .get = get_extra_state, 1536 .put = put_extra_state, 1537 }; 1538 1539 static const VMStateDescription vmstate_virtio_extra_state = { 1540 .name = "virtio/extra_state", 1541 .version_id = 1, 1542 .minimum_version_id = 1, 1543 .needed = &virtio_extra_state_needed, 1544 .fields = (VMStateField[]) { 1545 { 1546 .name = "extra_state", 1547 .version_id = 0, 1548 .field_exists = NULL, 1549 .size = 0, 1550 .info = &vmstate_info_extra_state, 1551 .flags = VMS_SINGLE, 1552 .offset = 0, 1553 }, 1554 VMSTATE_END_OF_LIST() 1555 } 1556 }; 1557 1558 static const VMStateDescription vmstate_virtio_device_endian = { 1559 .name = "virtio/device_endian", 1560 .version_id = 1, 1561 .minimum_version_id = 1, 1562 .needed = &virtio_device_endian_needed, 1563 .fields = (VMStateField[]) { 1564 VMSTATE_UINT8(device_endian, VirtIODevice), 1565 VMSTATE_END_OF_LIST() 1566 } 1567 }; 1568 1569 static const VMStateDescription vmstate_virtio_64bit_features = { 1570 .name = "virtio/64bit_features", 1571 .version_id = 1, 1572 .minimum_version_id = 1, 1573 .needed = &virtio_64bit_features_needed, 1574 .fields = (VMStateField[]) { 1575 VMSTATE_UINT64(guest_features, VirtIODevice), 1576 VMSTATE_END_OF_LIST() 1577 } 1578 }; 1579 1580 static const VMStateDescription vmstate_virtio_broken = { 1581 .name = "virtio/broken", 1582 .version_id = 1, 1583 .minimum_version_id = 1, 1584 .needed = &virtio_broken_needed, 1585 .fields = (VMStateField[]) { 1586 VMSTATE_BOOL(broken, VirtIODevice), 1587 VMSTATE_END_OF_LIST() 1588 } 1589 }; 1590 1591 static const VMStateDescription vmstate_virtio = { 1592 .name = "virtio", 1593 .version_id = 1, 1594 .minimum_version_id = 1, 1595 .minimum_version_id_old = 1, 1596 .fields = (VMStateField[]) { 1597 VMSTATE_END_OF_LIST() 1598 }, 1599 .subsections = (const VMStateDescription*[]) { 1600 &vmstate_virtio_device_endian, 1601 &vmstate_virtio_64bit_features, 1602 &vmstate_virtio_virtqueues, 1603 &vmstate_virtio_ringsize, 1604 &vmstate_virtio_broken, 1605 &vmstate_virtio_extra_state, 1606 NULL 1607 } 1608 }; 1609 1610 void virtio_save(VirtIODevice *vdev, QEMUFile *f) 1611 { 1612 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1613 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1614 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 1615 uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff); 1616 int i; 1617 1618 if (k->save_config) { 1619 k->save_config(qbus->parent, f); 1620 } 1621 1622 qemu_put_8s(f, &vdev->status); 1623 qemu_put_8s(f, &vdev->isr); 1624 qemu_put_be16s(f, &vdev->queue_sel); 1625 qemu_put_be32s(f, &guest_features_lo); 1626 qemu_put_be32(f, vdev->config_len); 1627 qemu_put_buffer(f, vdev->config, vdev->config_len); 1628 1629 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1630 if (vdev->vq[i].vring.num == 0) 1631 break; 1632 } 1633 1634 qemu_put_be32(f, i); 1635 1636 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1637 if (vdev->vq[i].vring.num == 0) 1638 break; 1639 1640 qemu_put_be32(f, vdev->vq[i].vring.num); 1641 if (k->has_variable_vring_alignment) { 1642 qemu_put_be32(f, vdev->vq[i].vring.align); 1643 } 1644 /* XXX virtio-1 devices */ 1645 qemu_put_be64(f, vdev->vq[i].vring.desc); 1646 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); 1647 if (k->save_queue) { 1648 k->save_queue(qbus->parent, i, f); 1649 } 1650 } 1651 1652 if (vdc->save != NULL) { 1653 vdc->save(vdev, f); 1654 } 1655 1656 if (vdc->vmsd) { 1657 vmstate_save_state(f, vdc->vmsd, vdev, NULL); 1658 } 1659 1660 /* Subsections */ 1661 vmstate_save_state(f, &vmstate_virtio, vdev, NULL); 1662 } 1663 1664 /* A wrapper for use as a VMState .put function */ 1665 static void virtio_device_put(QEMUFile *f, void *opaque, size_t size) 1666 { 1667 virtio_save(VIRTIO_DEVICE(opaque), f); 1668 } 1669 1670 /* A wrapper for use as a VMState .get function */ 1671 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size) 1672 { 1673 VirtIODevice *vdev = VIRTIO_DEVICE(opaque); 1674 DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev)); 1675 1676 return virtio_load(vdev, f, dc->vmsd->version_id); 1677 } 1678 1679 const VMStateInfo virtio_vmstate_info = { 1680 .name = "virtio", 1681 .get = virtio_device_get, 1682 .put = virtio_device_put, 1683 }; 1684 1685 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) 1686 { 1687 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1688 bool bad = (val & ~(vdev->host_features)) != 0; 1689 1690 val &= vdev->host_features; 1691 if (k->set_features) { 1692 k->set_features(vdev, val); 1693 } 1694 vdev->guest_features = val; 1695 return bad ? -1 : 0; 1696 } 1697 1698 int virtio_set_features(VirtIODevice *vdev, uint64_t val) 1699 { 1700 /* 1701 * The driver must not attempt to set features after feature negotiation 1702 * has finished. 1703 */ 1704 if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) { 1705 return -EINVAL; 1706 } 1707 return virtio_set_features_nocheck(vdev, val); 1708 } 1709 1710 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) 1711 { 1712 int i, ret; 1713 int32_t config_len; 1714 uint32_t num; 1715 uint32_t features; 1716 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1717 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1718 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 1719 1720 /* 1721 * We poison the endianness to ensure it does not get used before 1722 * subsections have been loaded. 1723 */ 1724 vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN; 1725 1726 if (k->load_config) { 1727 ret = k->load_config(qbus->parent, f); 1728 if (ret) 1729 return ret; 1730 } 1731 1732 qemu_get_8s(f, &vdev->status); 1733 qemu_get_8s(f, &vdev->isr); 1734 qemu_get_be16s(f, &vdev->queue_sel); 1735 if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) { 1736 return -1; 1737 } 1738 qemu_get_be32s(f, &features); 1739 1740 /* 1741 * Temporarily set guest_features low bits - needed by 1742 * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 1743 * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ. 1744 * 1745 * Note: devices should always test host features in future - don't create 1746 * new dependencies like this. 1747 */ 1748 vdev->guest_features = features; 1749 1750 config_len = qemu_get_be32(f); 1751 1752 /* 1753 * There are cases where the incoming config can be bigger or smaller 1754 * than what we have; so load what we have space for, and skip 1755 * any excess that's in the stream. 1756 */ 1757 qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len)); 1758 1759 while (config_len > vdev->config_len) { 1760 qemu_get_byte(f); 1761 config_len--; 1762 } 1763 1764 num = qemu_get_be32(f); 1765 1766 if (num > VIRTIO_QUEUE_MAX) { 1767 error_report("Invalid number of virtqueues: 0x%x", num); 1768 return -1; 1769 } 1770 1771 for (i = 0; i < num; i++) { 1772 vdev->vq[i].vring.num = qemu_get_be32(f); 1773 if (k->has_variable_vring_alignment) { 1774 vdev->vq[i].vring.align = qemu_get_be32(f); 1775 } 1776 vdev->vq[i].vring.desc = qemu_get_be64(f); 1777 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); 1778 vdev->vq[i].signalled_used_valid = false; 1779 vdev->vq[i].notification_disabled = 0; 1780 1781 if (vdev->vq[i].vring.desc) { 1782 /* XXX virtio-1 devices */ 1783 virtio_queue_update_rings(vdev, i); 1784 } else if (vdev->vq[i].last_avail_idx) { 1785 error_report("VQ %d address 0x0 " 1786 "inconsistent with Host index 0x%x", 1787 i, vdev->vq[i].last_avail_idx); 1788 return -1; 1789 } 1790 if (k->load_queue) { 1791 ret = k->load_queue(qbus->parent, i, f); 1792 if (ret) 1793 return ret; 1794 } 1795 } 1796 1797 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 1798 1799 if (vdc->load != NULL) { 1800 ret = vdc->load(vdev, f, version_id); 1801 if (ret) { 1802 return ret; 1803 } 1804 } 1805 1806 if (vdc->vmsd) { 1807 ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id); 1808 if (ret) { 1809 return ret; 1810 } 1811 } 1812 1813 /* Subsections */ 1814 ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1); 1815 if (ret) { 1816 return ret; 1817 } 1818 1819 if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) { 1820 vdev->device_endian = virtio_default_endian(); 1821 } 1822 1823 if (virtio_64bit_features_needed(vdev)) { 1824 /* 1825 * Subsection load filled vdev->guest_features. Run them 1826 * through virtio_set_features to sanity-check them against 1827 * host_features. 1828 */ 1829 uint64_t features64 = vdev->guest_features; 1830 if (virtio_set_features_nocheck(vdev, features64) < 0) { 1831 error_report("Features 0x%" PRIx64 " unsupported. " 1832 "Allowed features: 0x%" PRIx64, 1833 features64, vdev->host_features); 1834 return -1; 1835 } 1836 } else { 1837 if (virtio_set_features_nocheck(vdev, features) < 0) { 1838 error_report("Features 0x%x unsupported. " 1839 "Allowed features: 0x%" PRIx64, 1840 features, vdev->host_features); 1841 return -1; 1842 } 1843 } 1844 1845 for (i = 0; i < num; i++) { 1846 if (vdev->vq[i].vring.desc) { 1847 uint16_t nheads; 1848 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx; 1849 /* Check it isn't doing strange things with descriptor numbers. */ 1850 if (nheads > vdev->vq[i].vring.num) { 1851 error_report("VQ %d size 0x%x Guest index 0x%x " 1852 "inconsistent with Host index 0x%x: delta 0x%x", 1853 i, vdev->vq[i].vring.num, 1854 vring_avail_idx(&vdev->vq[i]), 1855 vdev->vq[i].last_avail_idx, nheads); 1856 return -1; 1857 } 1858 vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]); 1859 vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]); 1860 1861 /* 1862 * Some devices migrate VirtQueueElements that have been popped 1863 * from the avail ring but not yet returned to the used ring. 1864 */ 1865 vdev->vq[i].inuse = vdev->vq[i].last_avail_idx - 1866 vdev->vq[i].used_idx; 1867 if (vdev->vq[i].inuse > vdev->vq[i].vring.num) { 1868 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - " 1869 "used_idx 0x%x", 1870 i, vdev->vq[i].vring.num, 1871 vdev->vq[i].last_avail_idx, 1872 vdev->vq[i].used_idx); 1873 return -1; 1874 } 1875 } 1876 } 1877 1878 return 0; 1879 } 1880 1881 void virtio_cleanup(VirtIODevice *vdev) 1882 { 1883 qemu_del_vm_change_state_handler(vdev->vmstate); 1884 g_free(vdev->config); 1885 g_free(vdev->vq); 1886 g_free(vdev->vector_queues); 1887 } 1888 1889 static void virtio_vmstate_change(void *opaque, int running, RunState state) 1890 { 1891 VirtIODevice *vdev = opaque; 1892 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1893 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1894 bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); 1895 vdev->vm_running = running; 1896 1897 if (backend_run) { 1898 virtio_set_status(vdev, vdev->status); 1899 } 1900 1901 if (k->vmstate_change) { 1902 k->vmstate_change(qbus->parent, backend_run); 1903 } 1904 1905 if (!backend_run) { 1906 virtio_set_status(vdev, vdev->status); 1907 } 1908 } 1909 1910 void virtio_instance_init_common(Object *proxy_obj, void *data, 1911 size_t vdev_size, const char *vdev_name) 1912 { 1913 DeviceState *vdev = data; 1914 1915 object_initialize(vdev, vdev_size, vdev_name); 1916 object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL); 1917 object_unref(OBJECT(vdev)); 1918 qdev_alias_all_properties(vdev, proxy_obj); 1919 } 1920 1921 void virtio_init(VirtIODevice *vdev, const char *name, 1922 uint16_t device_id, size_t config_size) 1923 { 1924 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1925 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1926 int i; 1927 int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0; 1928 1929 if (nvectors) { 1930 vdev->vector_queues = 1931 g_malloc0(sizeof(*vdev->vector_queues) * nvectors); 1932 } 1933 1934 vdev->device_id = device_id; 1935 vdev->status = 0; 1936 atomic_set(&vdev->isr, 0); 1937 vdev->queue_sel = 0; 1938 vdev->config_vector = VIRTIO_NO_VECTOR; 1939 vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX); 1940 vdev->vm_running = runstate_is_running(); 1941 vdev->broken = false; 1942 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1943 vdev->vq[i].vector = VIRTIO_NO_VECTOR; 1944 vdev->vq[i].vdev = vdev; 1945 vdev->vq[i].queue_index = i; 1946 } 1947 1948 vdev->name = name; 1949 vdev->config_len = config_size; 1950 if (vdev->config_len) { 1951 vdev->config = g_malloc0(config_size); 1952 } else { 1953 vdev->config = NULL; 1954 } 1955 vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, 1956 vdev); 1957 vdev->device_endian = virtio_default_endian(); 1958 vdev->use_guest_notifier_mask = true; 1959 } 1960 1961 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) 1962 { 1963 return vdev->vq[n].vring.desc; 1964 } 1965 1966 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n) 1967 { 1968 return vdev->vq[n].vring.avail; 1969 } 1970 1971 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n) 1972 { 1973 return vdev->vq[n].vring.used; 1974 } 1975 1976 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) 1977 { 1978 return sizeof(VRingDesc) * vdev->vq[n].vring.num; 1979 } 1980 1981 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) 1982 { 1983 return offsetof(VRingAvail, ring) + 1984 sizeof(uint16_t) * vdev->vq[n].vring.num; 1985 } 1986 1987 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) 1988 { 1989 return offsetof(VRingUsed, ring) + 1990 sizeof(VRingUsedElem) * vdev->vq[n].vring.num; 1991 } 1992 1993 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) 1994 { 1995 return vdev->vq[n].last_avail_idx; 1996 } 1997 1998 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) 1999 { 2000 vdev->vq[n].last_avail_idx = idx; 2001 vdev->vq[n].shadow_avail_idx = idx; 2002 } 2003 2004 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n) 2005 { 2006 vdev->vq[n].signalled_used_valid = false; 2007 } 2008 2009 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n) 2010 { 2011 return vdev->vq + n; 2012 } 2013 2014 uint16_t virtio_get_queue_index(VirtQueue *vq) 2015 { 2016 return vq->queue_index; 2017 } 2018 2019 static void virtio_queue_guest_notifier_read(EventNotifier *n) 2020 { 2021 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier); 2022 if (event_notifier_test_and_clear(n)) { 2023 virtio_notify_vector(vq->vdev, vq->vector); 2024 } 2025 } 2026 2027 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, 2028 bool with_irqfd) 2029 { 2030 if (assign && !with_irqfd) { 2031 event_notifier_set_handler(&vq->guest_notifier, false, 2032 virtio_queue_guest_notifier_read); 2033 } else { 2034 event_notifier_set_handler(&vq->guest_notifier, false, NULL); 2035 } 2036 if (!assign) { 2037 /* Test and clear notifier before closing it, 2038 * in case poll callback didn't have time to run. */ 2039 virtio_queue_guest_notifier_read(&vq->guest_notifier); 2040 } 2041 } 2042 2043 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) 2044 { 2045 return &vq->guest_notifier; 2046 } 2047 2048 static void virtio_queue_host_notifier_aio_read(EventNotifier *n) 2049 { 2050 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 2051 if (event_notifier_test_and_clear(n)) { 2052 virtio_queue_notify_aio_vq(vq); 2053 } 2054 } 2055 2056 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n) 2057 { 2058 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 2059 2060 virtio_queue_set_notification(vq, 0); 2061 } 2062 2063 static bool virtio_queue_host_notifier_aio_poll(void *opaque) 2064 { 2065 EventNotifier *n = opaque; 2066 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 2067 2068 if (virtio_queue_empty(vq)) { 2069 return false; 2070 } 2071 2072 virtio_queue_notify_aio_vq(vq); 2073 return true; 2074 } 2075 2076 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) 2077 { 2078 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 2079 2080 /* Caller polls once more after this to catch requests that race with us */ 2081 virtio_queue_set_notification(vq, 1); 2082 } 2083 2084 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx, 2085 VirtIOHandleOutput handle_output) 2086 { 2087 if (handle_output) { 2088 vq->handle_aio_output = handle_output; 2089 aio_set_event_notifier(ctx, &vq->host_notifier, true, 2090 virtio_queue_host_notifier_aio_read, 2091 virtio_queue_host_notifier_aio_poll); 2092 aio_set_event_notifier_poll(ctx, &vq->host_notifier, 2093 virtio_queue_host_notifier_aio_poll_begin, 2094 virtio_queue_host_notifier_aio_poll_end); 2095 } else { 2096 aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL); 2097 /* Test and clear notifier before after disabling event, 2098 * in case poll callback didn't have time to run. */ 2099 virtio_queue_host_notifier_aio_read(&vq->host_notifier); 2100 vq->handle_aio_output = NULL; 2101 } 2102 } 2103 2104 void virtio_queue_host_notifier_read(EventNotifier *n) 2105 { 2106 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 2107 if (event_notifier_test_and_clear(n)) { 2108 virtio_queue_notify_vq(vq); 2109 } 2110 } 2111 2112 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) 2113 { 2114 return &vq->host_notifier; 2115 } 2116 2117 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) 2118 { 2119 g_free(vdev->bus_name); 2120 vdev->bus_name = g_strdup(bus_name); 2121 } 2122 2123 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...) 2124 { 2125 va_list ap; 2126 2127 va_start(ap, fmt); 2128 error_vreport(fmt, ap); 2129 va_end(ap); 2130 2131 vdev->broken = true; 2132 2133 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 2134 virtio_set_status(vdev, vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET); 2135 virtio_notify_config(vdev); 2136 } 2137 } 2138 2139 static void virtio_device_realize(DeviceState *dev, Error **errp) 2140 { 2141 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 2142 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 2143 Error *err = NULL; 2144 2145 /* Devices should either use vmsd or the load/save methods */ 2146 assert(!vdc->vmsd || !vdc->load); 2147 2148 if (vdc->realize != NULL) { 2149 vdc->realize(dev, &err); 2150 if (err != NULL) { 2151 error_propagate(errp, err); 2152 return; 2153 } 2154 } 2155 2156 virtio_bus_device_plugged(vdev, &err); 2157 if (err != NULL) { 2158 error_propagate(errp, err); 2159 return; 2160 } 2161 } 2162 2163 static void virtio_device_unrealize(DeviceState *dev, Error **errp) 2164 { 2165 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 2166 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 2167 Error *err = NULL; 2168 2169 virtio_bus_device_unplugged(vdev); 2170 2171 if (vdc->unrealize != NULL) { 2172 vdc->unrealize(dev, &err); 2173 if (err != NULL) { 2174 error_propagate(errp, err); 2175 return; 2176 } 2177 } 2178 2179 g_free(vdev->bus_name); 2180 vdev->bus_name = NULL; 2181 } 2182 2183 static Property virtio_properties[] = { 2184 DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features), 2185 DEFINE_PROP_END_OF_LIST(), 2186 }; 2187 2188 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev) 2189 { 2190 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); 2191 int n, r, err; 2192 2193 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { 2194 VirtQueue *vq = &vdev->vq[n]; 2195 if (!virtio_queue_get_num(vdev, n)) { 2196 continue; 2197 } 2198 r = virtio_bus_set_host_notifier(qbus, n, true); 2199 if (r < 0) { 2200 err = r; 2201 goto assign_error; 2202 } 2203 event_notifier_set_handler(&vq->host_notifier, true, 2204 virtio_queue_host_notifier_read); 2205 } 2206 2207 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { 2208 /* Kick right away to begin processing requests already in vring */ 2209 VirtQueue *vq = &vdev->vq[n]; 2210 if (!vq->vring.num) { 2211 continue; 2212 } 2213 event_notifier_set(&vq->host_notifier); 2214 } 2215 return 0; 2216 2217 assign_error: 2218 while (--n >= 0) { 2219 VirtQueue *vq = &vdev->vq[n]; 2220 if (!virtio_queue_get_num(vdev, n)) { 2221 continue; 2222 } 2223 2224 event_notifier_set_handler(&vq->host_notifier, true, NULL); 2225 r = virtio_bus_set_host_notifier(qbus, n, false); 2226 assert(r >= 0); 2227 } 2228 return err; 2229 } 2230 2231 int virtio_device_start_ioeventfd(VirtIODevice *vdev) 2232 { 2233 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 2234 VirtioBusState *vbus = VIRTIO_BUS(qbus); 2235 2236 return virtio_bus_start_ioeventfd(vbus); 2237 } 2238 2239 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev) 2240 { 2241 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); 2242 int n, r; 2243 2244 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { 2245 VirtQueue *vq = &vdev->vq[n]; 2246 2247 if (!virtio_queue_get_num(vdev, n)) { 2248 continue; 2249 } 2250 event_notifier_set_handler(&vq->host_notifier, true, NULL); 2251 r = virtio_bus_set_host_notifier(qbus, n, false); 2252 assert(r >= 0); 2253 } 2254 } 2255 2256 void virtio_device_stop_ioeventfd(VirtIODevice *vdev) 2257 { 2258 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 2259 VirtioBusState *vbus = VIRTIO_BUS(qbus); 2260 2261 virtio_bus_stop_ioeventfd(vbus); 2262 } 2263 2264 int virtio_device_grab_ioeventfd(VirtIODevice *vdev) 2265 { 2266 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 2267 VirtioBusState *vbus = VIRTIO_BUS(qbus); 2268 2269 return virtio_bus_grab_ioeventfd(vbus); 2270 } 2271 2272 void virtio_device_release_ioeventfd(VirtIODevice *vdev) 2273 { 2274 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 2275 VirtioBusState *vbus = VIRTIO_BUS(qbus); 2276 2277 virtio_bus_release_ioeventfd(vbus); 2278 } 2279 2280 static void virtio_device_class_init(ObjectClass *klass, void *data) 2281 { 2282 /* Set the default value here. */ 2283 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 2284 DeviceClass *dc = DEVICE_CLASS(klass); 2285 2286 dc->realize = virtio_device_realize; 2287 dc->unrealize = virtio_device_unrealize; 2288 dc->bus_type = TYPE_VIRTIO_BUS; 2289 dc->props = virtio_properties; 2290 vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl; 2291 vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl; 2292 2293 vdc->legacy_features |= VIRTIO_LEGACY_FEATURES; 2294 } 2295 2296 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev) 2297 { 2298 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 2299 VirtioBusState *vbus = VIRTIO_BUS(qbus); 2300 2301 return virtio_bus_ioeventfd_enabled(vbus); 2302 } 2303 2304 static const TypeInfo virtio_device_info = { 2305 .name = TYPE_VIRTIO_DEVICE, 2306 .parent = TYPE_DEVICE, 2307 .instance_size = sizeof(VirtIODevice), 2308 .class_init = virtio_device_class_init, 2309 .abstract = true, 2310 .class_size = sizeof(VirtioDeviceClass), 2311 }; 2312 2313 static void virtio_register_types(void) 2314 { 2315 type_register_static(&virtio_device_info); 2316 } 2317 2318 type_init(virtio_register_types) 2319