1 /* 2 * Virtio Support 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qapi/error.h" 16 #include "qemu-common.h" 17 #include "cpu.h" 18 #include "trace.h" 19 #include "exec/address-spaces.h" 20 #include "qemu/error-report.h" 21 #include "hw/virtio/virtio.h" 22 #include "qemu/atomic.h" 23 #include "hw/virtio/virtio-bus.h" 24 #include "migration/migration.h" 25 #include "hw/virtio/virtio-access.h" 26 27 /* 28 * The alignment to use between consumer and producer parts of vring. 29 * x86 pagesize again. This is the default, used by transports like PCI 30 * which don't provide a means for the guest to tell the host the alignment. 31 */ 32 #define VIRTIO_PCI_VRING_ALIGN 4096 33 34 typedef struct VRingDesc 35 { 36 uint64_t addr; 37 uint32_t len; 38 uint16_t flags; 39 uint16_t next; 40 } VRingDesc; 41 42 typedef struct VRingAvail 43 { 44 uint16_t flags; 45 uint16_t idx; 46 uint16_t ring[0]; 47 } VRingAvail; 48 49 typedef struct VRingUsedElem 50 { 51 uint32_t id; 52 uint32_t len; 53 } VRingUsedElem; 54 55 typedef struct VRingUsed 56 { 57 uint16_t flags; 58 uint16_t idx; 59 VRingUsedElem ring[0]; 60 } VRingUsed; 61 62 typedef struct VRing 63 { 64 unsigned int num; 65 unsigned int num_default; 66 unsigned int align; 67 hwaddr desc; 68 hwaddr avail; 69 hwaddr used; 70 } VRing; 71 72 struct VirtQueue 73 { 74 VRing vring; 75 76 /* Next head to pop */ 77 uint16_t last_avail_idx; 78 79 /* Last avail_idx read from VQ. */ 80 uint16_t shadow_avail_idx; 81 82 uint16_t used_idx; 83 84 /* Last used index value we have signalled on */ 85 uint16_t signalled_used; 86 87 /* Last used index value we have signalled on */ 88 bool signalled_used_valid; 89 90 /* Notification enabled? */ 91 bool notification; 92 93 uint16_t queue_index; 94 95 int inuse; 96 97 uint16_t vector; 98 VirtIOHandleOutput handle_output; 99 VirtIOHandleOutput handle_aio_output; 100 bool use_aio; 101 VirtIODevice *vdev; 102 EventNotifier guest_notifier; 103 EventNotifier host_notifier; 104 QLIST_ENTRY(VirtQueue) node; 105 }; 106 107 /* virt queue functions */ 108 void virtio_queue_update_rings(VirtIODevice *vdev, int n) 109 { 110 VRing *vring = &vdev->vq[n].vring; 111 112 if (!vring->desc) { 113 /* not yet setup -> nothing to do */ 114 return; 115 } 116 vring->avail = vring->desc + vring->num * sizeof(VRingDesc); 117 vring->used = vring_align(vring->avail + 118 offsetof(VRingAvail, ring[vring->num]), 119 vring->align); 120 } 121 122 static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, 123 hwaddr desc_pa, int i) 124 { 125 address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc), 126 MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc)); 127 virtio_tswap64s(vdev, &desc->addr); 128 virtio_tswap32s(vdev, &desc->len); 129 virtio_tswap16s(vdev, &desc->flags); 130 virtio_tswap16s(vdev, &desc->next); 131 } 132 133 static inline uint16_t vring_avail_flags(VirtQueue *vq) 134 { 135 hwaddr pa; 136 pa = vq->vring.avail + offsetof(VRingAvail, flags); 137 return virtio_lduw_phys(vq->vdev, pa); 138 } 139 140 static inline uint16_t vring_avail_idx(VirtQueue *vq) 141 { 142 hwaddr pa; 143 pa = vq->vring.avail + offsetof(VRingAvail, idx); 144 vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa); 145 return vq->shadow_avail_idx; 146 } 147 148 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) 149 { 150 hwaddr pa; 151 pa = vq->vring.avail + offsetof(VRingAvail, ring[i]); 152 return virtio_lduw_phys(vq->vdev, pa); 153 } 154 155 static inline uint16_t vring_get_used_event(VirtQueue *vq) 156 { 157 return vring_avail_ring(vq, vq->vring.num); 158 } 159 160 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem, 161 int i) 162 { 163 hwaddr pa; 164 virtio_tswap32s(vq->vdev, &uelem->id); 165 virtio_tswap32s(vq->vdev, &uelem->len); 166 pa = vq->vring.used + offsetof(VRingUsed, ring[i]); 167 address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED, 168 (void *)uelem, sizeof(VRingUsedElem)); 169 } 170 171 static uint16_t vring_used_idx(VirtQueue *vq) 172 { 173 hwaddr pa; 174 pa = vq->vring.used + offsetof(VRingUsed, idx); 175 return virtio_lduw_phys(vq->vdev, pa); 176 } 177 178 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) 179 { 180 hwaddr pa; 181 pa = vq->vring.used + offsetof(VRingUsed, idx); 182 virtio_stw_phys(vq->vdev, pa, val); 183 vq->used_idx = val; 184 } 185 186 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) 187 { 188 VirtIODevice *vdev = vq->vdev; 189 hwaddr pa; 190 pa = vq->vring.used + offsetof(VRingUsed, flags); 191 virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask); 192 } 193 194 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) 195 { 196 VirtIODevice *vdev = vq->vdev; 197 hwaddr pa; 198 pa = vq->vring.used + offsetof(VRingUsed, flags); 199 virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask); 200 } 201 202 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) 203 { 204 hwaddr pa; 205 if (!vq->notification) { 206 return; 207 } 208 pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]); 209 virtio_stw_phys(vq->vdev, pa, val); 210 } 211 212 void virtio_queue_set_notification(VirtQueue *vq, int enable) 213 { 214 vq->notification = enable; 215 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { 216 vring_set_avail_event(vq, vring_avail_idx(vq)); 217 } else if (enable) { 218 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); 219 } else { 220 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); 221 } 222 if (enable) { 223 /* Expose avail event/used flags before caller checks the avail idx. */ 224 smp_mb(); 225 } 226 } 227 228 int virtio_queue_ready(VirtQueue *vq) 229 { 230 return vq->vring.avail != 0; 231 } 232 233 /* Fetch avail_idx from VQ memory only when we really need to know if 234 * guest has added some buffers. */ 235 int virtio_queue_empty(VirtQueue *vq) 236 { 237 if (vq->shadow_avail_idx != vq->last_avail_idx) { 238 return 0; 239 } 240 241 return vring_avail_idx(vq) == vq->last_avail_idx; 242 } 243 244 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, 245 unsigned int len) 246 { 247 unsigned int offset; 248 int i; 249 250 offset = 0; 251 for (i = 0; i < elem->in_num; i++) { 252 size_t size = MIN(len - offset, elem->in_sg[i].iov_len); 253 254 cpu_physical_memory_unmap(elem->in_sg[i].iov_base, 255 elem->in_sg[i].iov_len, 256 1, size); 257 258 offset += size; 259 } 260 261 for (i = 0; i < elem->out_num; i++) 262 cpu_physical_memory_unmap(elem->out_sg[i].iov_base, 263 elem->out_sg[i].iov_len, 264 0, elem->out_sg[i].iov_len); 265 } 266 267 void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem, 268 unsigned int len) 269 { 270 vq->last_avail_idx--; 271 vq->inuse--; 272 virtqueue_unmap_sg(vq, elem, len); 273 } 274 275 /* virtqueue_rewind: 276 * @vq: The #VirtQueue 277 * @num: Number of elements to push back 278 * 279 * Pretend that elements weren't popped from the virtqueue. The next 280 * virtqueue_pop() will refetch the oldest element. 281 * 282 * Use virtqueue_discard() instead if you have a VirtQueueElement. 283 * 284 * Returns: true on success, false if @num is greater than the number of in use 285 * elements. 286 */ 287 bool virtqueue_rewind(VirtQueue *vq, unsigned int num) 288 { 289 if (num > vq->inuse) { 290 return false; 291 } 292 vq->last_avail_idx -= num; 293 vq->inuse -= num; 294 return true; 295 } 296 297 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, 298 unsigned int len, unsigned int idx) 299 { 300 VRingUsedElem uelem; 301 302 trace_virtqueue_fill(vq, elem, len, idx); 303 304 virtqueue_unmap_sg(vq, elem, len); 305 306 idx = (idx + vq->used_idx) % vq->vring.num; 307 308 uelem.id = elem->index; 309 uelem.len = len; 310 vring_used_write(vq, &uelem, idx); 311 } 312 313 void virtqueue_flush(VirtQueue *vq, unsigned int count) 314 { 315 uint16_t old, new; 316 /* Make sure buffer is written before we update index. */ 317 smp_wmb(); 318 trace_virtqueue_flush(vq, count); 319 old = vq->used_idx; 320 new = old + count; 321 vring_used_idx_set(vq, new); 322 vq->inuse -= count; 323 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) 324 vq->signalled_used_valid = false; 325 } 326 327 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, 328 unsigned int len) 329 { 330 virtqueue_fill(vq, elem, len, 0); 331 virtqueue_flush(vq, 1); 332 } 333 334 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) 335 { 336 uint16_t num_heads = vring_avail_idx(vq) - idx; 337 338 /* Check it isn't doing very strange things with descriptor numbers. */ 339 if (num_heads > vq->vring.num) { 340 error_report("Guest moved used index from %u to %u", 341 idx, vq->shadow_avail_idx); 342 exit(1); 343 } 344 /* On success, callers read a descriptor at vq->last_avail_idx. 345 * Make sure descriptor read does not bypass avail index read. */ 346 if (num_heads) { 347 smp_rmb(); 348 } 349 350 return num_heads; 351 } 352 353 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) 354 { 355 unsigned int head; 356 357 /* Grab the next descriptor number they're advertising, and increment 358 * the index we've seen. */ 359 head = vring_avail_ring(vq, idx % vq->vring.num); 360 361 /* If their number is silly, that's a fatal mistake. */ 362 if (head >= vq->vring.num) { 363 error_report("Guest says index %u is available", head); 364 exit(1); 365 } 366 367 return head; 368 } 369 370 static unsigned virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, 371 hwaddr desc_pa, unsigned int max) 372 { 373 unsigned int next; 374 375 /* If this descriptor says it doesn't chain, we're done. */ 376 if (!(desc->flags & VRING_DESC_F_NEXT)) { 377 return max; 378 } 379 380 /* Check they're not leading us off end of descriptors. */ 381 next = desc->next; 382 /* Make sure compiler knows to grab that: we don't want it changing! */ 383 smp_wmb(); 384 385 if (next >= max) { 386 error_report("Desc next is %u", next); 387 exit(1); 388 } 389 390 vring_desc_read(vdev, desc, desc_pa, next); 391 return next; 392 } 393 394 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, 395 unsigned int *out_bytes, 396 unsigned max_in_bytes, unsigned max_out_bytes) 397 { 398 unsigned int idx; 399 unsigned int total_bufs, in_total, out_total; 400 401 idx = vq->last_avail_idx; 402 403 total_bufs = in_total = out_total = 0; 404 while (virtqueue_num_heads(vq, idx)) { 405 VirtIODevice *vdev = vq->vdev; 406 unsigned int max, num_bufs, indirect = 0; 407 VRingDesc desc; 408 hwaddr desc_pa; 409 int i; 410 411 max = vq->vring.num; 412 num_bufs = total_bufs; 413 i = virtqueue_get_head(vq, idx++); 414 desc_pa = vq->vring.desc; 415 vring_desc_read(vdev, &desc, desc_pa, i); 416 417 if (desc.flags & VRING_DESC_F_INDIRECT) { 418 if (desc.len % sizeof(VRingDesc)) { 419 error_report("Invalid size for indirect buffer table"); 420 exit(1); 421 } 422 423 /* If we've got too many, that implies a descriptor loop. */ 424 if (num_bufs >= max) { 425 error_report("Looped descriptor"); 426 exit(1); 427 } 428 429 /* loop over the indirect descriptor table */ 430 indirect = 1; 431 max = desc.len / sizeof(VRingDesc); 432 desc_pa = desc.addr; 433 num_bufs = i = 0; 434 vring_desc_read(vdev, &desc, desc_pa, i); 435 } 436 437 do { 438 /* If we've got too many, that implies a descriptor loop. */ 439 if (++num_bufs > max) { 440 error_report("Looped descriptor"); 441 exit(1); 442 } 443 444 if (desc.flags & VRING_DESC_F_WRITE) { 445 in_total += desc.len; 446 } else { 447 out_total += desc.len; 448 } 449 if (in_total >= max_in_bytes && out_total >= max_out_bytes) { 450 goto done; 451 } 452 } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max); 453 454 if (!indirect) 455 total_bufs = num_bufs; 456 else 457 total_bufs++; 458 } 459 done: 460 if (in_bytes) { 461 *in_bytes = in_total; 462 } 463 if (out_bytes) { 464 *out_bytes = out_total; 465 } 466 } 467 468 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, 469 unsigned int out_bytes) 470 { 471 unsigned int in_total, out_total; 472 473 virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes); 474 return in_bytes <= in_total && out_bytes <= out_total; 475 } 476 477 static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov, 478 unsigned int max_num_sg, bool is_write, 479 hwaddr pa, size_t sz) 480 { 481 unsigned num_sg = *p_num_sg; 482 assert(num_sg <= max_num_sg); 483 484 if (!sz) { 485 error_report("virtio: zero sized buffers are not allowed"); 486 exit(1); 487 } 488 489 while (sz) { 490 hwaddr len = sz; 491 492 if (num_sg == max_num_sg) { 493 error_report("virtio: too many write descriptors in indirect table"); 494 exit(1); 495 } 496 497 iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write); 498 iov[num_sg].iov_len = len; 499 addr[num_sg] = pa; 500 501 sz -= len; 502 pa += len; 503 num_sg++; 504 } 505 *p_num_sg = num_sg; 506 } 507 508 static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr, 509 unsigned int *num_sg, unsigned int max_size, 510 int is_write) 511 { 512 unsigned int i; 513 hwaddr len; 514 515 /* Note: this function MUST validate input, some callers 516 * are passing in num_sg values received over the network. 517 */ 518 /* TODO: teach all callers that this can fail, and return failure instead 519 * of asserting here. 520 * When we do, we might be able to re-enable NDEBUG below. 521 */ 522 #ifdef NDEBUG 523 #error building with NDEBUG is not supported 524 #endif 525 assert(*num_sg <= max_size); 526 527 for (i = 0; i < *num_sg; i++) { 528 len = sg[i].iov_len; 529 sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write); 530 if (!sg[i].iov_base) { 531 error_report("virtio: error trying to map MMIO memory"); 532 exit(1); 533 } 534 if (len != sg[i].iov_len) { 535 error_report("virtio: unexpected memory split"); 536 exit(1); 537 } 538 } 539 } 540 541 void virtqueue_map(VirtQueueElement *elem) 542 { 543 virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num, 544 VIRTQUEUE_MAX_SIZE, 1); 545 virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num, 546 VIRTQUEUE_MAX_SIZE, 0); 547 } 548 549 void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num) 550 { 551 VirtQueueElement *elem; 552 size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0])); 553 size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]); 554 size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]); 555 size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0])); 556 size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]); 557 size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]); 558 559 assert(sz >= sizeof(VirtQueueElement)); 560 elem = g_malloc(out_sg_end); 561 elem->out_num = out_num; 562 elem->in_num = in_num; 563 elem->in_addr = (void *)elem + in_addr_ofs; 564 elem->out_addr = (void *)elem + out_addr_ofs; 565 elem->in_sg = (void *)elem + in_sg_ofs; 566 elem->out_sg = (void *)elem + out_sg_ofs; 567 return elem; 568 } 569 570 void *virtqueue_pop(VirtQueue *vq, size_t sz) 571 { 572 unsigned int i, head, max; 573 hwaddr desc_pa = vq->vring.desc; 574 VirtIODevice *vdev = vq->vdev; 575 VirtQueueElement *elem; 576 unsigned out_num, in_num; 577 hwaddr addr[VIRTQUEUE_MAX_SIZE]; 578 struct iovec iov[VIRTQUEUE_MAX_SIZE]; 579 VRingDesc desc; 580 581 if (virtio_queue_empty(vq)) { 582 return NULL; 583 } 584 /* Needed after virtio_queue_empty(), see comment in 585 * virtqueue_num_heads(). */ 586 smp_rmb(); 587 588 /* When we start there are none of either input nor output. */ 589 out_num = in_num = 0; 590 591 max = vq->vring.num; 592 593 if (vq->inuse >= vq->vring.num) { 594 error_report("Virtqueue size exceeded"); 595 exit(1); 596 } 597 598 i = head = virtqueue_get_head(vq, vq->last_avail_idx++); 599 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 600 vring_set_avail_event(vq, vq->last_avail_idx); 601 } 602 603 vring_desc_read(vdev, &desc, desc_pa, i); 604 if (desc.flags & VRING_DESC_F_INDIRECT) { 605 if (desc.len % sizeof(VRingDesc)) { 606 error_report("Invalid size for indirect buffer table"); 607 exit(1); 608 } 609 610 /* loop over the indirect descriptor table */ 611 max = desc.len / sizeof(VRingDesc); 612 desc_pa = desc.addr; 613 i = 0; 614 vring_desc_read(vdev, &desc, desc_pa, i); 615 } 616 617 /* Collect all the descriptors */ 618 do { 619 if (desc.flags & VRING_DESC_F_WRITE) { 620 virtqueue_map_desc(&in_num, addr + out_num, iov + out_num, 621 VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len); 622 } else { 623 if (in_num) { 624 error_report("Incorrect order for descriptors"); 625 exit(1); 626 } 627 virtqueue_map_desc(&out_num, addr, iov, 628 VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len); 629 } 630 631 /* If we've got too many, that implies a descriptor loop. */ 632 if ((in_num + out_num) > max) { 633 error_report("Looped descriptor"); 634 exit(1); 635 } 636 } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max); 637 638 /* Now copy what we have collected and mapped */ 639 elem = virtqueue_alloc_element(sz, out_num, in_num); 640 elem->index = head; 641 for (i = 0; i < out_num; i++) { 642 elem->out_addr[i] = addr[i]; 643 elem->out_sg[i] = iov[i]; 644 } 645 for (i = 0; i < in_num; i++) { 646 elem->in_addr[i] = addr[out_num + i]; 647 elem->in_sg[i] = iov[out_num + i]; 648 } 649 650 vq->inuse++; 651 652 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); 653 return elem; 654 } 655 656 /* Reading and writing a structure directly to QEMUFile is *awful*, but 657 * it is what QEMU has always done by mistake. We can change it sooner 658 * or later by bumping the version number of the affected vm states. 659 * In the meanwhile, since the in-memory layout of VirtQueueElement 660 * has changed, we need to marshal to and from the layout that was 661 * used before the change. 662 */ 663 typedef struct VirtQueueElementOld { 664 unsigned int index; 665 unsigned int out_num; 666 unsigned int in_num; 667 hwaddr in_addr[VIRTQUEUE_MAX_SIZE]; 668 hwaddr out_addr[VIRTQUEUE_MAX_SIZE]; 669 struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; 670 struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; 671 } VirtQueueElementOld; 672 673 void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz) 674 { 675 VirtQueueElement *elem; 676 VirtQueueElementOld data; 677 int i; 678 679 qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); 680 681 elem = virtqueue_alloc_element(sz, data.out_num, data.in_num); 682 elem->index = data.index; 683 684 for (i = 0; i < elem->in_num; i++) { 685 elem->in_addr[i] = data.in_addr[i]; 686 } 687 688 for (i = 0; i < elem->out_num; i++) { 689 elem->out_addr[i] = data.out_addr[i]; 690 } 691 692 for (i = 0; i < elem->in_num; i++) { 693 /* Base is overwritten by virtqueue_map. */ 694 elem->in_sg[i].iov_base = 0; 695 elem->in_sg[i].iov_len = data.in_sg[i].iov_len; 696 } 697 698 for (i = 0; i < elem->out_num; i++) { 699 /* Base is overwritten by virtqueue_map. */ 700 elem->out_sg[i].iov_base = 0; 701 elem->out_sg[i].iov_len = data.out_sg[i].iov_len; 702 } 703 704 virtqueue_map(elem); 705 return elem; 706 } 707 708 void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem) 709 { 710 VirtQueueElementOld data; 711 int i; 712 713 memset(&data, 0, sizeof(data)); 714 data.index = elem->index; 715 data.in_num = elem->in_num; 716 data.out_num = elem->out_num; 717 718 for (i = 0; i < elem->in_num; i++) { 719 data.in_addr[i] = elem->in_addr[i]; 720 } 721 722 for (i = 0; i < elem->out_num; i++) { 723 data.out_addr[i] = elem->out_addr[i]; 724 } 725 726 for (i = 0; i < elem->in_num; i++) { 727 /* Base is overwritten by virtqueue_map when loading. Do not 728 * save it, as it would leak the QEMU address space layout. */ 729 data.in_sg[i].iov_len = elem->in_sg[i].iov_len; 730 } 731 732 for (i = 0; i < elem->out_num; i++) { 733 /* Do not save iov_base as above. */ 734 data.out_sg[i].iov_len = elem->out_sg[i].iov_len; 735 } 736 qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); 737 } 738 739 /* virtio device */ 740 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector) 741 { 742 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 743 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 744 745 if (k->notify) { 746 k->notify(qbus->parent, vector); 747 } 748 } 749 750 void virtio_update_irq(VirtIODevice *vdev) 751 { 752 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 753 } 754 755 static int virtio_validate_features(VirtIODevice *vdev) 756 { 757 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 758 759 if (k->validate_features) { 760 return k->validate_features(vdev); 761 } else { 762 return 0; 763 } 764 } 765 766 int virtio_set_status(VirtIODevice *vdev, uint8_t val) 767 { 768 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 769 trace_virtio_set_status(vdev, val); 770 771 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 772 if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) && 773 val & VIRTIO_CONFIG_S_FEATURES_OK) { 774 int ret = virtio_validate_features(vdev); 775 776 if (ret) { 777 return ret; 778 } 779 } 780 } 781 if (k->set_status) { 782 k->set_status(vdev, val); 783 } 784 vdev->status = val; 785 return 0; 786 } 787 788 bool target_words_bigendian(void); 789 static enum virtio_device_endian virtio_default_endian(void) 790 { 791 if (target_words_bigendian()) { 792 return VIRTIO_DEVICE_ENDIAN_BIG; 793 } else { 794 return VIRTIO_DEVICE_ENDIAN_LITTLE; 795 } 796 } 797 798 static enum virtio_device_endian virtio_current_cpu_endian(void) 799 { 800 CPUClass *cc = CPU_GET_CLASS(current_cpu); 801 802 if (cc->virtio_is_big_endian(current_cpu)) { 803 return VIRTIO_DEVICE_ENDIAN_BIG; 804 } else { 805 return VIRTIO_DEVICE_ENDIAN_LITTLE; 806 } 807 } 808 809 void virtio_reset(void *opaque) 810 { 811 VirtIODevice *vdev = opaque; 812 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 813 int i; 814 815 virtio_set_status(vdev, 0); 816 if (current_cpu) { 817 /* Guest initiated reset */ 818 vdev->device_endian = virtio_current_cpu_endian(); 819 } else { 820 /* System reset */ 821 vdev->device_endian = virtio_default_endian(); 822 } 823 824 if (k->reset) { 825 k->reset(vdev); 826 } 827 828 vdev->guest_features = 0; 829 vdev->queue_sel = 0; 830 vdev->status = 0; 831 vdev->isr = 0; 832 vdev->config_vector = VIRTIO_NO_VECTOR; 833 virtio_notify_vector(vdev, vdev->config_vector); 834 835 for(i = 0; i < VIRTIO_QUEUE_MAX; i++) { 836 vdev->vq[i].vring.desc = 0; 837 vdev->vq[i].vring.avail = 0; 838 vdev->vq[i].vring.used = 0; 839 vdev->vq[i].last_avail_idx = 0; 840 vdev->vq[i].shadow_avail_idx = 0; 841 vdev->vq[i].used_idx = 0; 842 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); 843 vdev->vq[i].signalled_used = 0; 844 vdev->vq[i].signalled_used_valid = false; 845 vdev->vq[i].notification = true; 846 vdev->vq[i].vring.num = vdev->vq[i].vring.num_default; 847 vdev->vq[i].inuse = 0; 848 } 849 } 850 851 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr) 852 { 853 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 854 uint8_t val; 855 856 if (addr + sizeof(val) > vdev->config_len) { 857 return (uint32_t)-1; 858 } 859 860 k->get_config(vdev, vdev->config); 861 862 val = ldub_p(vdev->config + addr); 863 return val; 864 } 865 866 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr) 867 { 868 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 869 uint16_t val; 870 871 if (addr + sizeof(val) > vdev->config_len) { 872 return (uint32_t)-1; 873 } 874 875 k->get_config(vdev, vdev->config); 876 877 val = lduw_p(vdev->config + addr); 878 return val; 879 } 880 881 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr) 882 { 883 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 884 uint32_t val; 885 886 if (addr + sizeof(val) > vdev->config_len) { 887 return (uint32_t)-1; 888 } 889 890 k->get_config(vdev, vdev->config); 891 892 val = ldl_p(vdev->config + addr); 893 return val; 894 } 895 896 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data) 897 { 898 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 899 uint8_t val = data; 900 901 if (addr + sizeof(val) > vdev->config_len) { 902 return; 903 } 904 905 stb_p(vdev->config + addr, val); 906 907 if (k->set_config) { 908 k->set_config(vdev, vdev->config); 909 } 910 } 911 912 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data) 913 { 914 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 915 uint16_t val = data; 916 917 if (addr + sizeof(val) > vdev->config_len) { 918 return; 919 } 920 921 stw_p(vdev->config + addr, val); 922 923 if (k->set_config) { 924 k->set_config(vdev, vdev->config); 925 } 926 } 927 928 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data) 929 { 930 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 931 uint32_t val = data; 932 933 if (addr + sizeof(val) > vdev->config_len) { 934 return; 935 } 936 937 stl_p(vdev->config + addr, val); 938 939 if (k->set_config) { 940 k->set_config(vdev, vdev->config); 941 } 942 } 943 944 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr) 945 { 946 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 947 uint8_t val; 948 949 if (addr + sizeof(val) > vdev->config_len) { 950 return (uint32_t)-1; 951 } 952 953 k->get_config(vdev, vdev->config); 954 955 val = ldub_p(vdev->config + addr); 956 return val; 957 } 958 959 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr) 960 { 961 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 962 uint16_t val; 963 964 if (addr + sizeof(val) > vdev->config_len) { 965 return (uint32_t)-1; 966 } 967 968 k->get_config(vdev, vdev->config); 969 970 val = lduw_le_p(vdev->config + addr); 971 return val; 972 } 973 974 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr) 975 { 976 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 977 uint32_t val; 978 979 if (addr + sizeof(val) > vdev->config_len) { 980 return (uint32_t)-1; 981 } 982 983 k->get_config(vdev, vdev->config); 984 985 val = ldl_le_p(vdev->config + addr); 986 return val; 987 } 988 989 void virtio_config_modern_writeb(VirtIODevice *vdev, 990 uint32_t addr, uint32_t data) 991 { 992 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 993 uint8_t val = data; 994 995 if (addr + sizeof(val) > vdev->config_len) { 996 return; 997 } 998 999 stb_p(vdev->config + addr, val); 1000 1001 if (k->set_config) { 1002 k->set_config(vdev, vdev->config); 1003 } 1004 } 1005 1006 void virtio_config_modern_writew(VirtIODevice *vdev, 1007 uint32_t addr, uint32_t data) 1008 { 1009 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1010 uint16_t val = data; 1011 1012 if (addr + sizeof(val) > vdev->config_len) { 1013 return; 1014 } 1015 1016 stw_le_p(vdev->config + addr, val); 1017 1018 if (k->set_config) { 1019 k->set_config(vdev, vdev->config); 1020 } 1021 } 1022 1023 void virtio_config_modern_writel(VirtIODevice *vdev, 1024 uint32_t addr, uint32_t data) 1025 { 1026 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1027 uint32_t val = data; 1028 1029 if (addr + sizeof(val) > vdev->config_len) { 1030 return; 1031 } 1032 1033 stl_le_p(vdev->config + addr, val); 1034 1035 if (k->set_config) { 1036 k->set_config(vdev, vdev->config); 1037 } 1038 } 1039 1040 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr) 1041 { 1042 vdev->vq[n].vring.desc = addr; 1043 virtio_queue_update_rings(vdev, n); 1044 } 1045 1046 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n) 1047 { 1048 return vdev->vq[n].vring.desc; 1049 } 1050 1051 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc, 1052 hwaddr avail, hwaddr used) 1053 { 1054 vdev->vq[n].vring.desc = desc; 1055 vdev->vq[n].vring.avail = avail; 1056 vdev->vq[n].vring.used = used; 1057 } 1058 1059 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) 1060 { 1061 /* Don't allow guest to flip queue between existent and 1062 * nonexistent states, or to set it to an invalid size. 1063 */ 1064 if (!!num != !!vdev->vq[n].vring.num || 1065 num > VIRTQUEUE_MAX_SIZE || 1066 num < 0) { 1067 return; 1068 } 1069 vdev->vq[n].vring.num = num; 1070 } 1071 1072 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector) 1073 { 1074 return QLIST_FIRST(&vdev->vector_queues[vector]); 1075 } 1076 1077 VirtQueue *virtio_vector_next_queue(VirtQueue *vq) 1078 { 1079 return QLIST_NEXT(vq, node); 1080 } 1081 1082 int virtio_queue_get_num(VirtIODevice *vdev, int n) 1083 { 1084 return vdev->vq[n].vring.num; 1085 } 1086 1087 int virtio_get_num_queues(VirtIODevice *vdev) 1088 { 1089 int i; 1090 1091 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1092 if (!virtio_queue_get_num(vdev, i)) { 1093 break; 1094 } 1095 } 1096 1097 return i; 1098 } 1099 1100 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) 1101 { 1102 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1103 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1104 1105 /* virtio-1 compliant devices cannot change the alignment */ 1106 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 1107 error_report("tried to modify queue alignment for virtio-1 device"); 1108 return; 1109 } 1110 /* Check that the transport told us it was going to do this 1111 * (so a buggy transport will immediately assert rather than 1112 * silently failing to migrate this state) 1113 */ 1114 assert(k->has_variable_vring_alignment); 1115 1116 vdev->vq[n].vring.align = align; 1117 virtio_queue_update_rings(vdev, n); 1118 } 1119 1120 static void virtio_queue_notify_aio_vq(VirtQueue *vq) 1121 { 1122 if (vq->vring.desc && vq->handle_aio_output) { 1123 VirtIODevice *vdev = vq->vdev; 1124 1125 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); 1126 vq->handle_aio_output(vdev, vq); 1127 } 1128 } 1129 1130 static void virtio_queue_notify_vq(VirtQueue *vq) 1131 { 1132 if (vq->vring.desc && vq->handle_output) { 1133 VirtIODevice *vdev = vq->vdev; 1134 1135 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); 1136 vq->handle_output(vdev, vq); 1137 } 1138 } 1139 1140 void virtio_queue_notify(VirtIODevice *vdev, int n) 1141 { 1142 virtio_queue_notify_vq(&vdev->vq[n]); 1143 } 1144 1145 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) 1146 { 1147 return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector : 1148 VIRTIO_NO_VECTOR; 1149 } 1150 1151 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector) 1152 { 1153 VirtQueue *vq = &vdev->vq[n]; 1154 1155 if (n < VIRTIO_QUEUE_MAX) { 1156 if (vdev->vector_queues && 1157 vdev->vq[n].vector != VIRTIO_NO_VECTOR) { 1158 QLIST_REMOVE(vq, node); 1159 } 1160 vdev->vq[n].vector = vector; 1161 if (vdev->vector_queues && 1162 vector != VIRTIO_NO_VECTOR) { 1163 QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node); 1164 } 1165 } 1166 } 1167 1168 static VirtQueue *virtio_add_queue_internal(VirtIODevice *vdev, int queue_size, 1169 VirtIOHandleOutput handle_output, 1170 bool use_aio) 1171 { 1172 int i; 1173 1174 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1175 if (vdev->vq[i].vring.num == 0) 1176 break; 1177 } 1178 1179 if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) 1180 abort(); 1181 1182 vdev->vq[i].vring.num = queue_size; 1183 vdev->vq[i].vring.num_default = queue_size; 1184 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN; 1185 vdev->vq[i].handle_output = handle_output; 1186 vdev->vq[i].handle_aio_output = NULL; 1187 vdev->vq[i].use_aio = use_aio; 1188 1189 return &vdev->vq[i]; 1190 } 1191 1192 /* Add a virt queue and mark AIO. 1193 * An AIO queue will use the AioContext based event interface instead of the 1194 * default IOHandler and EventNotifier interface. 1195 */ 1196 VirtQueue *virtio_add_queue_aio(VirtIODevice *vdev, int queue_size, 1197 VirtIOHandleOutput handle_output) 1198 { 1199 return virtio_add_queue_internal(vdev, queue_size, handle_output, true); 1200 } 1201 1202 /* Add a normal virt queue (on the contrary to the AIO version above. */ 1203 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, 1204 VirtIOHandleOutput handle_output) 1205 { 1206 return virtio_add_queue_internal(vdev, queue_size, handle_output, false); 1207 } 1208 1209 void virtio_del_queue(VirtIODevice *vdev, int n) 1210 { 1211 if (n < 0 || n >= VIRTIO_QUEUE_MAX) { 1212 abort(); 1213 } 1214 1215 vdev->vq[n].vring.num = 0; 1216 vdev->vq[n].vring.num_default = 0; 1217 } 1218 1219 void virtio_irq(VirtQueue *vq) 1220 { 1221 trace_virtio_irq(vq); 1222 vq->vdev->isr |= 0x01; 1223 virtio_notify_vector(vq->vdev, vq->vector); 1224 } 1225 1226 bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) 1227 { 1228 uint16_t old, new; 1229 bool v; 1230 /* We need to expose used array entries before checking used event. */ 1231 smp_mb(); 1232 /* Always notify when queue is empty (when feature acknowledge) */ 1233 if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) && 1234 !vq->inuse && virtio_queue_empty(vq)) { 1235 return true; 1236 } 1237 1238 if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 1239 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); 1240 } 1241 1242 v = vq->signalled_used_valid; 1243 vq->signalled_used_valid = true; 1244 old = vq->signalled_used; 1245 new = vq->signalled_used = vq->used_idx; 1246 return !v || vring_need_event(vring_get_used_event(vq), new, old); 1247 } 1248 1249 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) 1250 { 1251 if (!virtio_should_notify(vdev, vq)) { 1252 return; 1253 } 1254 1255 trace_virtio_notify(vdev, vq); 1256 vdev->isr |= 0x01; 1257 virtio_notify_vector(vdev, vq->vector); 1258 } 1259 1260 void virtio_notify_config(VirtIODevice *vdev) 1261 { 1262 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 1263 return; 1264 1265 vdev->isr |= 0x03; 1266 vdev->generation++; 1267 virtio_notify_vector(vdev, vdev->config_vector); 1268 } 1269 1270 static bool virtio_device_endian_needed(void *opaque) 1271 { 1272 VirtIODevice *vdev = opaque; 1273 1274 assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN); 1275 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 1276 return vdev->device_endian != virtio_default_endian(); 1277 } 1278 /* Devices conforming to VIRTIO 1.0 or later are always LE. */ 1279 return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE; 1280 } 1281 1282 static bool virtio_64bit_features_needed(void *opaque) 1283 { 1284 VirtIODevice *vdev = opaque; 1285 1286 return (vdev->host_features >> 32) != 0; 1287 } 1288 1289 static bool virtio_virtqueue_needed(void *opaque) 1290 { 1291 VirtIODevice *vdev = opaque; 1292 1293 return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1); 1294 } 1295 1296 static bool virtio_ringsize_needed(void *opaque) 1297 { 1298 VirtIODevice *vdev = opaque; 1299 int i; 1300 1301 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1302 if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) { 1303 return true; 1304 } 1305 } 1306 return false; 1307 } 1308 1309 static bool virtio_extra_state_needed(void *opaque) 1310 { 1311 VirtIODevice *vdev = opaque; 1312 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1313 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1314 1315 return k->has_extra_state && 1316 k->has_extra_state(qbus->parent); 1317 } 1318 1319 static const VMStateDescription vmstate_virtqueue = { 1320 .name = "virtqueue_state", 1321 .version_id = 1, 1322 .minimum_version_id = 1, 1323 .fields = (VMStateField[]) { 1324 VMSTATE_UINT64(vring.avail, struct VirtQueue), 1325 VMSTATE_UINT64(vring.used, struct VirtQueue), 1326 VMSTATE_END_OF_LIST() 1327 } 1328 }; 1329 1330 static const VMStateDescription vmstate_virtio_virtqueues = { 1331 .name = "virtio/virtqueues", 1332 .version_id = 1, 1333 .minimum_version_id = 1, 1334 .needed = &virtio_virtqueue_needed, 1335 .fields = (VMStateField[]) { 1336 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, 1337 VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue), 1338 VMSTATE_END_OF_LIST() 1339 } 1340 }; 1341 1342 static const VMStateDescription vmstate_ringsize = { 1343 .name = "ringsize_state", 1344 .version_id = 1, 1345 .minimum_version_id = 1, 1346 .fields = (VMStateField[]) { 1347 VMSTATE_UINT32(vring.num_default, struct VirtQueue), 1348 VMSTATE_END_OF_LIST() 1349 } 1350 }; 1351 1352 static const VMStateDescription vmstate_virtio_ringsize = { 1353 .name = "virtio/ringsize", 1354 .version_id = 1, 1355 .minimum_version_id = 1, 1356 .needed = &virtio_ringsize_needed, 1357 .fields = (VMStateField[]) { 1358 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, 1359 VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue), 1360 VMSTATE_END_OF_LIST() 1361 } 1362 }; 1363 1364 static int get_extra_state(QEMUFile *f, void *pv, size_t size) 1365 { 1366 VirtIODevice *vdev = pv; 1367 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1368 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1369 1370 if (!k->load_extra_state) { 1371 return -1; 1372 } else { 1373 return k->load_extra_state(qbus->parent, f); 1374 } 1375 } 1376 1377 static void put_extra_state(QEMUFile *f, void *pv, size_t size) 1378 { 1379 VirtIODevice *vdev = pv; 1380 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1381 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1382 1383 k->save_extra_state(qbus->parent, f); 1384 } 1385 1386 static const VMStateInfo vmstate_info_extra_state = { 1387 .name = "virtqueue_extra_state", 1388 .get = get_extra_state, 1389 .put = put_extra_state, 1390 }; 1391 1392 static const VMStateDescription vmstate_virtio_extra_state = { 1393 .name = "virtio/extra_state", 1394 .version_id = 1, 1395 .minimum_version_id = 1, 1396 .needed = &virtio_extra_state_needed, 1397 .fields = (VMStateField[]) { 1398 { 1399 .name = "extra_state", 1400 .version_id = 0, 1401 .field_exists = NULL, 1402 .size = 0, 1403 .info = &vmstate_info_extra_state, 1404 .flags = VMS_SINGLE, 1405 .offset = 0, 1406 }, 1407 VMSTATE_END_OF_LIST() 1408 } 1409 }; 1410 1411 static const VMStateDescription vmstate_virtio_device_endian = { 1412 .name = "virtio/device_endian", 1413 .version_id = 1, 1414 .minimum_version_id = 1, 1415 .needed = &virtio_device_endian_needed, 1416 .fields = (VMStateField[]) { 1417 VMSTATE_UINT8(device_endian, VirtIODevice), 1418 VMSTATE_END_OF_LIST() 1419 } 1420 }; 1421 1422 static const VMStateDescription vmstate_virtio_64bit_features = { 1423 .name = "virtio/64bit_features", 1424 .version_id = 1, 1425 .minimum_version_id = 1, 1426 .needed = &virtio_64bit_features_needed, 1427 .fields = (VMStateField[]) { 1428 VMSTATE_UINT64(guest_features, VirtIODevice), 1429 VMSTATE_END_OF_LIST() 1430 } 1431 }; 1432 1433 static const VMStateDescription vmstate_virtio = { 1434 .name = "virtio", 1435 .version_id = 1, 1436 .minimum_version_id = 1, 1437 .minimum_version_id_old = 1, 1438 .fields = (VMStateField[]) { 1439 VMSTATE_END_OF_LIST() 1440 }, 1441 .subsections = (const VMStateDescription*[]) { 1442 &vmstate_virtio_device_endian, 1443 &vmstate_virtio_64bit_features, 1444 &vmstate_virtio_virtqueues, 1445 &vmstate_virtio_ringsize, 1446 &vmstate_virtio_extra_state, 1447 NULL 1448 } 1449 }; 1450 1451 void virtio_save(VirtIODevice *vdev, QEMUFile *f) 1452 { 1453 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1454 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1455 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 1456 uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff); 1457 int i; 1458 1459 if (k->save_config) { 1460 k->save_config(qbus->parent, f); 1461 } 1462 1463 qemu_put_8s(f, &vdev->status); 1464 qemu_put_8s(f, &vdev->isr); 1465 qemu_put_be16s(f, &vdev->queue_sel); 1466 qemu_put_be32s(f, &guest_features_lo); 1467 qemu_put_be32(f, vdev->config_len); 1468 qemu_put_buffer(f, vdev->config, vdev->config_len); 1469 1470 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1471 if (vdev->vq[i].vring.num == 0) 1472 break; 1473 } 1474 1475 qemu_put_be32(f, i); 1476 1477 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1478 if (vdev->vq[i].vring.num == 0) 1479 break; 1480 1481 qemu_put_be32(f, vdev->vq[i].vring.num); 1482 if (k->has_variable_vring_alignment) { 1483 qemu_put_be32(f, vdev->vq[i].vring.align); 1484 } 1485 /* XXX virtio-1 devices */ 1486 qemu_put_be64(f, vdev->vq[i].vring.desc); 1487 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); 1488 if (k->save_queue) { 1489 k->save_queue(qbus->parent, i, f); 1490 } 1491 } 1492 1493 if (vdc->save != NULL) { 1494 vdc->save(vdev, f); 1495 } 1496 1497 /* Subsections */ 1498 vmstate_save_state(f, &vmstate_virtio, vdev, NULL); 1499 } 1500 1501 /* A wrapper for use as a VMState .put function */ 1502 void virtio_vmstate_save(QEMUFile *f, void *opaque, size_t size) 1503 { 1504 virtio_save(VIRTIO_DEVICE(opaque), f); 1505 } 1506 1507 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) 1508 { 1509 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1510 bool bad = (val & ~(vdev->host_features)) != 0; 1511 1512 val &= vdev->host_features; 1513 if (k->set_features) { 1514 k->set_features(vdev, val); 1515 } 1516 vdev->guest_features = val; 1517 return bad ? -1 : 0; 1518 } 1519 1520 int virtio_set_features(VirtIODevice *vdev, uint64_t val) 1521 { 1522 /* 1523 * The driver must not attempt to set features after feature negotiation 1524 * has finished. 1525 */ 1526 if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) { 1527 return -EINVAL; 1528 } 1529 return virtio_set_features_nocheck(vdev, val); 1530 } 1531 1532 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) 1533 { 1534 int i, ret; 1535 int32_t config_len; 1536 uint32_t num; 1537 uint32_t features; 1538 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1539 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1540 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 1541 1542 /* 1543 * We poison the endianness to ensure it does not get used before 1544 * subsections have been loaded. 1545 */ 1546 vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN; 1547 1548 if (k->load_config) { 1549 ret = k->load_config(qbus->parent, f); 1550 if (ret) 1551 return ret; 1552 } 1553 1554 qemu_get_8s(f, &vdev->status); 1555 qemu_get_8s(f, &vdev->isr); 1556 qemu_get_be16s(f, &vdev->queue_sel); 1557 if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) { 1558 return -1; 1559 } 1560 qemu_get_be32s(f, &features); 1561 1562 /* 1563 * Temporarily set guest_features low bits - needed by 1564 * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 1565 * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ. 1566 * 1567 * Note: devices should always test host features in future - don't create 1568 * new dependencies like this. 1569 */ 1570 vdev->guest_features = features; 1571 1572 config_len = qemu_get_be32(f); 1573 1574 /* 1575 * There are cases where the incoming config can be bigger or smaller 1576 * than what we have; so load what we have space for, and skip 1577 * any excess that's in the stream. 1578 */ 1579 qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len)); 1580 1581 while (config_len > vdev->config_len) { 1582 qemu_get_byte(f); 1583 config_len--; 1584 } 1585 1586 num = qemu_get_be32(f); 1587 1588 if (num > VIRTIO_QUEUE_MAX) { 1589 error_report("Invalid number of virtqueues: 0x%x", num); 1590 return -1; 1591 } 1592 1593 for (i = 0; i < num; i++) { 1594 vdev->vq[i].vring.num = qemu_get_be32(f); 1595 if (k->has_variable_vring_alignment) { 1596 vdev->vq[i].vring.align = qemu_get_be32(f); 1597 } 1598 vdev->vq[i].vring.desc = qemu_get_be64(f); 1599 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); 1600 vdev->vq[i].signalled_used_valid = false; 1601 vdev->vq[i].notification = true; 1602 1603 if (vdev->vq[i].vring.desc) { 1604 /* XXX virtio-1 devices */ 1605 virtio_queue_update_rings(vdev, i); 1606 } else if (vdev->vq[i].last_avail_idx) { 1607 error_report("VQ %d address 0x0 " 1608 "inconsistent with Host index 0x%x", 1609 i, vdev->vq[i].last_avail_idx); 1610 return -1; 1611 } 1612 if (k->load_queue) { 1613 ret = k->load_queue(qbus->parent, i, f); 1614 if (ret) 1615 return ret; 1616 } 1617 } 1618 1619 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 1620 1621 if (vdc->load != NULL) { 1622 ret = vdc->load(vdev, f, version_id); 1623 if (ret) { 1624 return ret; 1625 } 1626 } 1627 1628 /* Subsections */ 1629 ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1); 1630 if (ret) { 1631 return ret; 1632 } 1633 1634 if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) { 1635 vdev->device_endian = virtio_default_endian(); 1636 } 1637 1638 if (virtio_64bit_features_needed(vdev)) { 1639 /* 1640 * Subsection load filled vdev->guest_features. Run them 1641 * through virtio_set_features to sanity-check them against 1642 * host_features. 1643 */ 1644 uint64_t features64 = vdev->guest_features; 1645 if (virtio_set_features_nocheck(vdev, features64) < 0) { 1646 error_report("Features 0x%" PRIx64 " unsupported. " 1647 "Allowed features: 0x%" PRIx64, 1648 features64, vdev->host_features); 1649 return -1; 1650 } 1651 } else { 1652 if (virtio_set_features_nocheck(vdev, features) < 0) { 1653 error_report("Features 0x%x unsupported. " 1654 "Allowed features: 0x%" PRIx64, 1655 features, vdev->host_features); 1656 return -1; 1657 } 1658 } 1659 1660 for (i = 0; i < num; i++) { 1661 if (vdev->vq[i].vring.desc) { 1662 uint16_t nheads; 1663 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx; 1664 /* Check it isn't doing strange things with descriptor numbers. */ 1665 if (nheads > vdev->vq[i].vring.num) { 1666 error_report("VQ %d size 0x%x Guest index 0x%x " 1667 "inconsistent with Host index 0x%x: delta 0x%x", 1668 i, vdev->vq[i].vring.num, 1669 vring_avail_idx(&vdev->vq[i]), 1670 vdev->vq[i].last_avail_idx, nheads); 1671 return -1; 1672 } 1673 vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]); 1674 vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]); 1675 1676 /* 1677 * Some devices migrate VirtQueueElements that have been popped 1678 * from the avail ring but not yet returned to the used ring. 1679 */ 1680 vdev->vq[i].inuse = vdev->vq[i].last_avail_idx - 1681 vdev->vq[i].used_idx; 1682 if (vdev->vq[i].inuse > vdev->vq[i].vring.num) { 1683 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - " 1684 "used_idx 0x%x", 1685 i, vdev->vq[i].vring.num, 1686 vdev->vq[i].last_avail_idx, 1687 vdev->vq[i].used_idx); 1688 return -1; 1689 } 1690 } 1691 } 1692 1693 return 0; 1694 } 1695 1696 void virtio_cleanup(VirtIODevice *vdev) 1697 { 1698 qemu_del_vm_change_state_handler(vdev->vmstate); 1699 g_free(vdev->config); 1700 g_free(vdev->vq); 1701 g_free(vdev->vector_queues); 1702 } 1703 1704 static void virtio_vmstate_change(void *opaque, int running, RunState state) 1705 { 1706 VirtIODevice *vdev = opaque; 1707 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1708 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1709 bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); 1710 vdev->vm_running = running; 1711 1712 if (backend_run) { 1713 virtio_set_status(vdev, vdev->status); 1714 } 1715 1716 if (k->vmstate_change) { 1717 k->vmstate_change(qbus->parent, backend_run); 1718 } 1719 1720 if (!backend_run) { 1721 virtio_set_status(vdev, vdev->status); 1722 } 1723 } 1724 1725 void virtio_instance_init_common(Object *proxy_obj, void *data, 1726 size_t vdev_size, const char *vdev_name) 1727 { 1728 DeviceState *vdev = data; 1729 1730 object_initialize(vdev, vdev_size, vdev_name); 1731 object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL); 1732 object_unref(OBJECT(vdev)); 1733 qdev_alias_all_properties(vdev, proxy_obj); 1734 } 1735 1736 void virtio_init(VirtIODevice *vdev, const char *name, 1737 uint16_t device_id, size_t config_size) 1738 { 1739 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1740 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1741 int i; 1742 int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0; 1743 1744 if (nvectors) { 1745 vdev->vector_queues = 1746 g_malloc0(sizeof(*vdev->vector_queues) * nvectors); 1747 } 1748 1749 vdev->device_id = device_id; 1750 vdev->status = 0; 1751 vdev->isr = 0; 1752 vdev->queue_sel = 0; 1753 vdev->config_vector = VIRTIO_NO_VECTOR; 1754 vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX); 1755 vdev->vm_running = runstate_is_running(); 1756 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1757 vdev->vq[i].vector = VIRTIO_NO_VECTOR; 1758 vdev->vq[i].vdev = vdev; 1759 vdev->vq[i].queue_index = i; 1760 } 1761 1762 vdev->name = name; 1763 vdev->config_len = config_size; 1764 if (vdev->config_len) { 1765 vdev->config = g_malloc0(config_size); 1766 } else { 1767 vdev->config = NULL; 1768 } 1769 vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, 1770 vdev); 1771 vdev->device_endian = virtio_default_endian(); 1772 vdev->use_guest_notifier_mask = true; 1773 } 1774 1775 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) 1776 { 1777 return vdev->vq[n].vring.desc; 1778 } 1779 1780 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n) 1781 { 1782 return vdev->vq[n].vring.avail; 1783 } 1784 1785 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n) 1786 { 1787 return vdev->vq[n].vring.used; 1788 } 1789 1790 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n) 1791 { 1792 return vdev->vq[n].vring.desc; 1793 } 1794 1795 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) 1796 { 1797 return sizeof(VRingDesc) * vdev->vq[n].vring.num; 1798 } 1799 1800 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) 1801 { 1802 return offsetof(VRingAvail, ring) + 1803 sizeof(uint16_t) * vdev->vq[n].vring.num; 1804 } 1805 1806 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) 1807 { 1808 return offsetof(VRingUsed, ring) + 1809 sizeof(VRingUsedElem) * vdev->vq[n].vring.num; 1810 } 1811 1812 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n) 1813 { 1814 return vdev->vq[n].vring.used - vdev->vq[n].vring.desc + 1815 virtio_queue_get_used_size(vdev, n); 1816 } 1817 1818 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) 1819 { 1820 return vdev->vq[n].last_avail_idx; 1821 } 1822 1823 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) 1824 { 1825 vdev->vq[n].last_avail_idx = idx; 1826 vdev->vq[n].shadow_avail_idx = idx; 1827 } 1828 1829 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n) 1830 { 1831 vdev->vq[n].signalled_used_valid = false; 1832 } 1833 1834 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n) 1835 { 1836 return vdev->vq + n; 1837 } 1838 1839 uint16_t virtio_get_queue_index(VirtQueue *vq) 1840 { 1841 return vq->queue_index; 1842 } 1843 1844 static void virtio_queue_guest_notifier_read(EventNotifier *n) 1845 { 1846 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier); 1847 if (event_notifier_test_and_clear(n)) { 1848 virtio_irq(vq); 1849 } 1850 } 1851 1852 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, 1853 bool with_irqfd) 1854 { 1855 if (assign && !with_irqfd) { 1856 event_notifier_set_handler(&vq->guest_notifier, false, 1857 virtio_queue_guest_notifier_read); 1858 } else { 1859 event_notifier_set_handler(&vq->guest_notifier, false, NULL); 1860 } 1861 if (!assign) { 1862 /* Test and clear notifier before closing it, 1863 * in case poll callback didn't have time to run. */ 1864 virtio_queue_guest_notifier_read(&vq->guest_notifier); 1865 } 1866 } 1867 1868 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) 1869 { 1870 return &vq->guest_notifier; 1871 } 1872 1873 static void virtio_queue_host_notifier_aio_read(EventNotifier *n) 1874 { 1875 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 1876 if (event_notifier_test_and_clear(n)) { 1877 virtio_queue_notify_aio_vq(vq); 1878 } 1879 } 1880 1881 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx, 1882 VirtIOHandleOutput handle_output) 1883 { 1884 if (handle_output) { 1885 vq->handle_aio_output = handle_output; 1886 aio_set_event_notifier(ctx, &vq->host_notifier, true, 1887 virtio_queue_host_notifier_aio_read); 1888 } else { 1889 aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL); 1890 /* Test and clear notifier before after disabling event, 1891 * in case poll callback didn't have time to run. */ 1892 virtio_queue_host_notifier_aio_read(&vq->host_notifier); 1893 vq->handle_aio_output = NULL; 1894 } 1895 } 1896 1897 static void virtio_queue_host_notifier_read(EventNotifier *n) 1898 { 1899 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 1900 if (event_notifier_test_and_clear(n)) { 1901 virtio_queue_notify_vq(vq); 1902 } 1903 } 1904 1905 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, 1906 bool set_handler) 1907 { 1908 AioContext *ctx = qemu_get_aio_context(); 1909 if (assign && set_handler) { 1910 if (vq->use_aio) { 1911 aio_set_event_notifier(ctx, &vq->host_notifier, true, 1912 virtio_queue_host_notifier_read); 1913 } else { 1914 event_notifier_set_handler(&vq->host_notifier, true, 1915 virtio_queue_host_notifier_read); 1916 } 1917 } else { 1918 if (vq->use_aio) { 1919 aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL); 1920 } else { 1921 event_notifier_set_handler(&vq->host_notifier, true, NULL); 1922 } 1923 } 1924 if (!assign) { 1925 /* Test and clear notifier before after disabling event, 1926 * in case poll callback didn't have time to run. */ 1927 virtio_queue_host_notifier_read(&vq->host_notifier); 1928 } 1929 } 1930 1931 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) 1932 { 1933 return &vq->host_notifier; 1934 } 1935 1936 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) 1937 { 1938 g_free(vdev->bus_name); 1939 vdev->bus_name = g_strdup(bus_name); 1940 } 1941 1942 static void virtio_device_realize(DeviceState *dev, Error **errp) 1943 { 1944 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1945 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 1946 Error *err = NULL; 1947 1948 if (vdc->realize != NULL) { 1949 vdc->realize(dev, &err); 1950 if (err != NULL) { 1951 error_propagate(errp, err); 1952 return; 1953 } 1954 } 1955 1956 virtio_bus_device_plugged(vdev, &err); 1957 if (err != NULL) { 1958 error_propagate(errp, err); 1959 return; 1960 } 1961 } 1962 1963 static void virtio_device_unrealize(DeviceState *dev, Error **errp) 1964 { 1965 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1966 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 1967 Error *err = NULL; 1968 1969 virtio_bus_device_unplugged(vdev); 1970 1971 if (vdc->unrealize != NULL) { 1972 vdc->unrealize(dev, &err); 1973 if (err != NULL) { 1974 error_propagate(errp, err); 1975 return; 1976 } 1977 } 1978 1979 g_free(vdev->bus_name); 1980 vdev->bus_name = NULL; 1981 } 1982 1983 static Property virtio_properties[] = { 1984 DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features), 1985 DEFINE_PROP_END_OF_LIST(), 1986 }; 1987 1988 static void virtio_device_class_init(ObjectClass *klass, void *data) 1989 { 1990 /* Set the default value here. */ 1991 DeviceClass *dc = DEVICE_CLASS(klass); 1992 1993 dc->realize = virtio_device_realize; 1994 dc->unrealize = virtio_device_unrealize; 1995 dc->bus_type = TYPE_VIRTIO_BUS; 1996 dc->props = virtio_properties; 1997 } 1998 1999 static const TypeInfo virtio_device_info = { 2000 .name = TYPE_VIRTIO_DEVICE, 2001 .parent = TYPE_DEVICE, 2002 .instance_size = sizeof(VirtIODevice), 2003 .class_init = virtio_device_class_init, 2004 .abstract = true, 2005 .class_size = sizeof(VirtioDeviceClass), 2006 }; 2007 2008 static void virtio_register_types(void) 2009 { 2010 type_register_static(&virtio_device_info); 2011 } 2012 2013 type_init(virtio_register_types) 2014