1 /* 2 * Virtio Support 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qapi/error.h" 16 #include "qemu-common.h" 17 #include "cpu.h" 18 #include "trace.h" 19 #include "exec/address-spaces.h" 20 #include "qemu/error-report.h" 21 #include "hw/virtio/virtio.h" 22 #include "qemu/atomic.h" 23 #include "hw/virtio/virtio-bus.h" 24 #include "migration/migration.h" 25 #include "hw/virtio/virtio-access.h" 26 27 /* 28 * The alignment to use between consumer and producer parts of vring. 29 * x86 pagesize again. This is the default, used by transports like PCI 30 * which don't provide a means for the guest to tell the host the alignment. 31 */ 32 #define VIRTIO_PCI_VRING_ALIGN 4096 33 34 typedef struct VRingDesc 35 { 36 uint64_t addr; 37 uint32_t len; 38 uint16_t flags; 39 uint16_t next; 40 } VRingDesc; 41 42 typedef struct VRingAvail 43 { 44 uint16_t flags; 45 uint16_t idx; 46 uint16_t ring[0]; 47 } VRingAvail; 48 49 typedef struct VRingUsedElem 50 { 51 uint32_t id; 52 uint32_t len; 53 } VRingUsedElem; 54 55 typedef struct VRingUsed 56 { 57 uint16_t flags; 58 uint16_t idx; 59 VRingUsedElem ring[0]; 60 } VRingUsed; 61 62 typedef struct VRing 63 { 64 unsigned int num; 65 unsigned int num_default; 66 unsigned int align; 67 hwaddr desc; 68 hwaddr avail; 69 hwaddr used; 70 } VRing; 71 72 struct VirtQueue 73 { 74 VRing vring; 75 76 /* Next head to pop */ 77 uint16_t last_avail_idx; 78 79 /* Last avail_idx read from VQ. */ 80 uint16_t shadow_avail_idx; 81 82 uint16_t used_idx; 83 84 /* Last used index value we have signalled on */ 85 uint16_t signalled_used; 86 87 /* Last used index value we have signalled on */ 88 bool signalled_used_valid; 89 90 /* Notification enabled? */ 91 bool notification; 92 93 uint16_t queue_index; 94 95 int inuse; 96 97 uint16_t vector; 98 VirtIOHandleOutput handle_output; 99 VirtIOHandleOutput handle_aio_output; 100 bool use_aio; 101 VirtIODevice *vdev; 102 EventNotifier guest_notifier; 103 EventNotifier host_notifier; 104 QLIST_ENTRY(VirtQueue) node; 105 }; 106 107 /* virt queue functions */ 108 void virtio_queue_update_rings(VirtIODevice *vdev, int n) 109 { 110 VRing *vring = &vdev->vq[n].vring; 111 112 if (!vring->desc) { 113 /* not yet setup -> nothing to do */ 114 return; 115 } 116 vring->avail = vring->desc + vring->num * sizeof(VRingDesc); 117 vring->used = vring_align(vring->avail + 118 offsetof(VRingAvail, ring[vring->num]), 119 vring->align); 120 } 121 122 static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, 123 hwaddr desc_pa, int i) 124 { 125 address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc), 126 MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc)); 127 virtio_tswap64s(vdev, &desc->addr); 128 virtio_tswap32s(vdev, &desc->len); 129 virtio_tswap16s(vdev, &desc->flags); 130 virtio_tswap16s(vdev, &desc->next); 131 } 132 133 static inline uint16_t vring_avail_flags(VirtQueue *vq) 134 { 135 hwaddr pa; 136 pa = vq->vring.avail + offsetof(VRingAvail, flags); 137 return virtio_lduw_phys(vq->vdev, pa); 138 } 139 140 static inline uint16_t vring_avail_idx(VirtQueue *vq) 141 { 142 hwaddr pa; 143 pa = vq->vring.avail + offsetof(VRingAvail, idx); 144 vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa); 145 return vq->shadow_avail_idx; 146 } 147 148 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) 149 { 150 hwaddr pa; 151 pa = vq->vring.avail + offsetof(VRingAvail, ring[i]); 152 return virtio_lduw_phys(vq->vdev, pa); 153 } 154 155 static inline uint16_t vring_get_used_event(VirtQueue *vq) 156 { 157 return vring_avail_ring(vq, vq->vring.num); 158 } 159 160 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem, 161 int i) 162 { 163 hwaddr pa; 164 virtio_tswap32s(vq->vdev, &uelem->id); 165 virtio_tswap32s(vq->vdev, &uelem->len); 166 pa = vq->vring.used + offsetof(VRingUsed, ring[i]); 167 address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED, 168 (void *)uelem, sizeof(VRingUsedElem)); 169 } 170 171 static uint16_t vring_used_idx(VirtQueue *vq) 172 { 173 hwaddr pa; 174 pa = vq->vring.used + offsetof(VRingUsed, idx); 175 return virtio_lduw_phys(vq->vdev, pa); 176 } 177 178 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) 179 { 180 hwaddr pa; 181 pa = vq->vring.used + offsetof(VRingUsed, idx); 182 virtio_stw_phys(vq->vdev, pa, val); 183 vq->used_idx = val; 184 } 185 186 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) 187 { 188 VirtIODevice *vdev = vq->vdev; 189 hwaddr pa; 190 pa = vq->vring.used + offsetof(VRingUsed, flags); 191 virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask); 192 } 193 194 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) 195 { 196 VirtIODevice *vdev = vq->vdev; 197 hwaddr pa; 198 pa = vq->vring.used + offsetof(VRingUsed, flags); 199 virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask); 200 } 201 202 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) 203 { 204 hwaddr pa; 205 if (!vq->notification) { 206 return; 207 } 208 pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]); 209 virtio_stw_phys(vq->vdev, pa, val); 210 } 211 212 void virtio_queue_set_notification(VirtQueue *vq, int enable) 213 { 214 vq->notification = enable; 215 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { 216 vring_set_avail_event(vq, vring_avail_idx(vq)); 217 } else if (enable) { 218 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); 219 } else { 220 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); 221 } 222 if (enable) { 223 /* Expose avail event/used flags before caller checks the avail idx. */ 224 smp_mb(); 225 } 226 } 227 228 int virtio_queue_ready(VirtQueue *vq) 229 { 230 return vq->vring.avail != 0; 231 } 232 233 /* Fetch avail_idx from VQ memory only when we really need to know if 234 * guest has added some buffers. */ 235 int virtio_queue_empty(VirtQueue *vq) 236 { 237 if (vq->shadow_avail_idx != vq->last_avail_idx) { 238 return 0; 239 } 240 241 return vring_avail_idx(vq) == vq->last_avail_idx; 242 } 243 244 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, 245 unsigned int len) 246 { 247 unsigned int offset; 248 int i; 249 250 offset = 0; 251 for (i = 0; i < elem->in_num; i++) { 252 size_t size = MIN(len - offset, elem->in_sg[i].iov_len); 253 254 cpu_physical_memory_unmap(elem->in_sg[i].iov_base, 255 elem->in_sg[i].iov_len, 256 1, size); 257 258 offset += size; 259 } 260 261 for (i = 0; i < elem->out_num; i++) 262 cpu_physical_memory_unmap(elem->out_sg[i].iov_base, 263 elem->out_sg[i].iov_len, 264 0, elem->out_sg[i].iov_len); 265 } 266 267 void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem, 268 unsigned int len) 269 { 270 vq->last_avail_idx--; 271 vq->inuse--; 272 virtqueue_unmap_sg(vq, elem, len); 273 } 274 275 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, 276 unsigned int len, unsigned int idx) 277 { 278 VRingUsedElem uelem; 279 280 trace_virtqueue_fill(vq, elem, len, idx); 281 282 virtqueue_unmap_sg(vq, elem, len); 283 284 idx = (idx + vq->used_idx) % vq->vring.num; 285 286 uelem.id = elem->index; 287 uelem.len = len; 288 vring_used_write(vq, &uelem, idx); 289 } 290 291 void virtqueue_flush(VirtQueue *vq, unsigned int count) 292 { 293 uint16_t old, new; 294 /* Make sure buffer is written before we update index. */ 295 smp_wmb(); 296 trace_virtqueue_flush(vq, count); 297 old = vq->used_idx; 298 new = old + count; 299 vring_used_idx_set(vq, new); 300 vq->inuse -= count; 301 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) 302 vq->signalled_used_valid = false; 303 } 304 305 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, 306 unsigned int len) 307 { 308 virtqueue_fill(vq, elem, len, 0); 309 virtqueue_flush(vq, 1); 310 } 311 312 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) 313 { 314 uint16_t num_heads = vring_avail_idx(vq) - idx; 315 316 /* Check it isn't doing very strange things with descriptor numbers. */ 317 if (num_heads > vq->vring.num) { 318 error_report("Guest moved used index from %u to %u", 319 idx, vq->shadow_avail_idx); 320 exit(1); 321 } 322 /* On success, callers read a descriptor at vq->last_avail_idx. 323 * Make sure descriptor read does not bypass avail index read. */ 324 if (num_heads) { 325 smp_rmb(); 326 } 327 328 return num_heads; 329 } 330 331 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) 332 { 333 unsigned int head; 334 335 /* Grab the next descriptor number they're advertising, and increment 336 * the index we've seen. */ 337 head = vring_avail_ring(vq, idx % vq->vring.num); 338 339 /* If their number is silly, that's a fatal mistake. */ 340 if (head >= vq->vring.num) { 341 error_report("Guest says index %u is available", head); 342 exit(1); 343 } 344 345 return head; 346 } 347 348 static unsigned virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, 349 hwaddr desc_pa, unsigned int max) 350 { 351 unsigned int next; 352 353 /* If this descriptor says it doesn't chain, we're done. */ 354 if (!(desc->flags & VRING_DESC_F_NEXT)) { 355 return max; 356 } 357 358 /* Check they're not leading us off end of descriptors. */ 359 next = desc->next; 360 /* Make sure compiler knows to grab that: we don't want it changing! */ 361 smp_wmb(); 362 363 if (next >= max) { 364 error_report("Desc next is %u", next); 365 exit(1); 366 } 367 368 vring_desc_read(vdev, desc, desc_pa, next); 369 return next; 370 } 371 372 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, 373 unsigned int *out_bytes, 374 unsigned max_in_bytes, unsigned max_out_bytes) 375 { 376 unsigned int idx; 377 unsigned int total_bufs, in_total, out_total; 378 379 idx = vq->last_avail_idx; 380 381 total_bufs = in_total = out_total = 0; 382 while (virtqueue_num_heads(vq, idx)) { 383 VirtIODevice *vdev = vq->vdev; 384 unsigned int max, num_bufs, indirect = 0; 385 VRingDesc desc; 386 hwaddr desc_pa; 387 int i; 388 389 max = vq->vring.num; 390 num_bufs = total_bufs; 391 i = virtqueue_get_head(vq, idx++); 392 desc_pa = vq->vring.desc; 393 vring_desc_read(vdev, &desc, desc_pa, i); 394 395 if (desc.flags & VRING_DESC_F_INDIRECT) { 396 if (desc.len % sizeof(VRingDesc)) { 397 error_report("Invalid size for indirect buffer table"); 398 exit(1); 399 } 400 401 /* If we've got too many, that implies a descriptor loop. */ 402 if (num_bufs >= max) { 403 error_report("Looped descriptor"); 404 exit(1); 405 } 406 407 /* loop over the indirect descriptor table */ 408 indirect = 1; 409 max = desc.len / sizeof(VRingDesc); 410 desc_pa = desc.addr; 411 num_bufs = i = 0; 412 vring_desc_read(vdev, &desc, desc_pa, i); 413 } 414 415 do { 416 /* If we've got too many, that implies a descriptor loop. */ 417 if (++num_bufs > max) { 418 error_report("Looped descriptor"); 419 exit(1); 420 } 421 422 if (desc.flags & VRING_DESC_F_WRITE) { 423 in_total += desc.len; 424 } else { 425 out_total += desc.len; 426 } 427 if (in_total >= max_in_bytes && out_total >= max_out_bytes) { 428 goto done; 429 } 430 } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max); 431 432 if (!indirect) 433 total_bufs = num_bufs; 434 else 435 total_bufs++; 436 } 437 done: 438 if (in_bytes) { 439 *in_bytes = in_total; 440 } 441 if (out_bytes) { 442 *out_bytes = out_total; 443 } 444 } 445 446 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, 447 unsigned int out_bytes) 448 { 449 unsigned int in_total, out_total; 450 451 virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes); 452 return in_bytes <= in_total && out_bytes <= out_total; 453 } 454 455 static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov, 456 unsigned int max_num_sg, bool is_write, 457 hwaddr pa, size_t sz) 458 { 459 unsigned num_sg = *p_num_sg; 460 assert(num_sg <= max_num_sg); 461 462 if (!sz) { 463 error_report("virtio: zero sized buffers are not allowed"); 464 exit(1); 465 } 466 467 while (sz) { 468 hwaddr len = sz; 469 470 if (num_sg == max_num_sg) { 471 error_report("virtio: too many write descriptors in indirect table"); 472 exit(1); 473 } 474 475 iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write); 476 iov[num_sg].iov_len = len; 477 addr[num_sg] = pa; 478 479 sz -= len; 480 pa += len; 481 num_sg++; 482 } 483 *p_num_sg = num_sg; 484 } 485 486 static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr, 487 unsigned int *num_sg, unsigned int max_size, 488 int is_write) 489 { 490 unsigned int i; 491 hwaddr len; 492 493 /* Note: this function MUST validate input, some callers 494 * are passing in num_sg values received over the network. 495 */ 496 /* TODO: teach all callers that this can fail, and return failure instead 497 * of asserting here. 498 * When we do, we might be able to re-enable NDEBUG below. 499 */ 500 #ifdef NDEBUG 501 #error building with NDEBUG is not supported 502 #endif 503 assert(*num_sg <= max_size); 504 505 for (i = 0; i < *num_sg; i++) { 506 len = sg[i].iov_len; 507 sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write); 508 if (!sg[i].iov_base) { 509 error_report("virtio: error trying to map MMIO memory"); 510 exit(1); 511 } 512 if (len != sg[i].iov_len) { 513 error_report("virtio: unexpected memory split"); 514 exit(1); 515 } 516 } 517 } 518 519 void virtqueue_map(VirtQueueElement *elem) 520 { 521 virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num, 522 VIRTQUEUE_MAX_SIZE, 1); 523 virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num, 524 VIRTQUEUE_MAX_SIZE, 0); 525 } 526 527 void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num) 528 { 529 VirtQueueElement *elem; 530 size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0])); 531 size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]); 532 size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]); 533 size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0])); 534 size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]); 535 size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]); 536 537 assert(sz >= sizeof(VirtQueueElement)); 538 elem = g_malloc(out_sg_end); 539 elem->out_num = out_num; 540 elem->in_num = in_num; 541 elem->in_addr = (void *)elem + in_addr_ofs; 542 elem->out_addr = (void *)elem + out_addr_ofs; 543 elem->in_sg = (void *)elem + in_sg_ofs; 544 elem->out_sg = (void *)elem + out_sg_ofs; 545 return elem; 546 } 547 548 void *virtqueue_pop(VirtQueue *vq, size_t sz) 549 { 550 unsigned int i, head, max; 551 hwaddr desc_pa = vq->vring.desc; 552 VirtIODevice *vdev = vq->vdev; 553 VirtQueueElement *elem; 554 unsigned out_num, in_num; 555 hwaddr addr[VIRTQUEUE_MAX_SIZE]; 556 struct iovec iov[VIRTQUEUE_MAX_SIZE]; 557 VRingDesc desc; 558 559 if (virtio_queue_empty(vq)) { 560 return NULL; 561 } 562 /* Needed after virtio_queue_empty(), see comment in 563 * virtqueue_num_heads(). */ 564 smp_rmb(); 565 566 /* When we start there are none of either input nor output. */ 567 out_num = in_num = 0; 568 569 max = vq->vring.num; 570 571 if (vq->inuse >= vq->vring.num) { 572 error_report("Virtqueue size exceeded"); 573 exit(1); 574 } 575 576 i = head = virtqueue_get_head(vq, vq->last_avail_idx++); 577 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 578 vring_set_avail_event(vq, vq->last_avail_idx); 579 } 580 581 vring_desc_read(vdev, &desc, desc_pa, i); 582 if (desc.flags & VRING_DESC_F_INDIRECT) { 583 if (desc.len % sizeof(VRingDesc)) { 584 error_report("Invalid size for indirect buffer table"); 585 exit(1); 586 } 587 588 /* loop over the indirect descriptor table */ 589 max = desc.len / sizeof(VRingDesc); 590 desc_pa = desc.addr; 591 i = 0; 592 vring_desc_read(vdev, &desc, desc_pa, i); 593 } 594 595 /* Collect all the descriptors */ 596 do { 597 if (desc.flags & VRING_DESC_F_WRITE) { 598 virtqueue_map_desc(&in_num, addr + out_num, iov + out_num, 599 VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len); 600 } else { 601 if (in_num) { 602 error_report("Incorrect order for descriptors"); 603 exit(1); 604 } 605 virtqueue_map_desc(&out_num, addr, iov, 606 VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len); 607 } 608 609 /* If we've got too many, that implies a descriptor loop. */ 610 if ((in_num + out_num) > max) { 611 error_report("Looped descriptor"); 612 exit(1); 613 } 614 } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max); 615 616 /* Now copy what we have collected and mapped */ 617 elem = virtqueue_alloc_element(sz, out_num, in_num); 618 elem->index = head; 619 for (i = 0; i < out_num; i++) { 620 elem->out_addr[i] = addr[i]; 621 elem->out_sg[i] = iov[i]; 622 } 623 for (i = 0; i < in_num; i++) { 624 elem->in_addr[i] = addr[out_num + i]; 625 elem->in_sg[i] = iov[out_num + i]; 626 } 627 628 vq->inuse++; 629 630 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); 631 return elem; 632 } 633 634 /* Reading and writing a structure directly to QEMUFile is *awful*, but 635 * it is what QEMU has always done by mistake. We can change it sooner 636 * or later by bumping the version number of the affected vm states. 637 * In the meanwhile, since the in-memory layout of VirtQueueElement 638 * has changed, we need to marshal to and from the layout that was 639 * used before the change. 640 */ 641 typedef struct VirtQueueElementOld { 642 unsigned int index; 643 unsigned int out_num; 644 unsigned int in_num; 645 hwaddr in_addr[VIRTQUEUE_MAX_SIZE]; 646 hwaddr out_addr[VIRTQUEUE_MAX_SIZE]; 647 struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; 648 struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; 649 } VirtQueueElementOld; 650 651 void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz) 652 { 653 VirtQueueElement *elem; 654 VirtQueueElementOld data; 655 int i; 656 657 qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); 658 659 elem = virtqueue_alloc_element(sz, data.out_num, data.in_num); 660 elem->index = data.index; 661 662 for (i = 0; i < elem->in_num; i++) { 663 elem->in_addr[i] = data.in_addr[i]; 664 } 665 666 for (i = 0; i < elem->out_num; i++) { 667 elem->out_addr[i] = data.out_addr[i]; 668 } 669 670 for (i = 0; i < elem->in_num; i++) { 671 /* Base is overwritten by virtqueue_map. */ 672 elem->in_sg[i].iov_base = 0; 673 elem->in_sg[i].iov_len = data.in_sg[i].iov_len; 674 } 675 676 for (i = 0; i < elem->out_num; i++) { 677 /* Base is overwritten by virtqueue_map. */ 678 elem->out_sg[i].iov_base = 0; 679 elem->out_sg[i].iov_len = data.out_sg[i].iov_len; 680 } 681 682 virtqueue_map(elem); 683 return elem; 684 } 685 686 void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem) 687 { 688 VirtQueueElementOld data; 689 int i; 690 691 memset(&data, 0, sizeof(data)); 692 data.index = elem->index; 693 data.in_num = elem->in_num; 694 data.out_num = elem->out_num; 695 696 for (i = 0; i < elem->in_num; i++) { 697 data.in_addr[i] = elem->in_addr[i]; 698 } 699 700 for (i = 0; i < elem->out_num; i++) { 701 data.out_addr[i] = elem->out_addr[i]; 702 } 703 704 for (i = 0; i < elem->in_num; i++) { 705 /* Base is overwritten by virtqueue_map when loading. Do not 706 * save it, as it would leak the QEMU address space layout. */ 707 data.in_sg[i].iov_len = elem->in_sg[i].iov_len; 708 } 709 710 for (i = 0; i < elem->out_num; i++) { 711 /* Do not save iov_base as above. */ 712 data.out_sg[i].iov_len = elem->out_sg[i].iov_len; 713 } 714 qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); 715 } 716 717 /* virtio device */ 718 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector) 719 { 720 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 721 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 722 723 if (k->notify) { 724 k->notify(qbus->parent, vector); 725 } 726 } 727 728 void virtio_update_irq(VirtIODevice *vdev) 729 { 730 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 731 } 732 733 static int virtio_validate_features(VirtIODevice *vdev) 734 { 735 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 736 737 if (k->validate_features) { 738 return k->validate_features(vdev); 739 } else { 740 return 0; 741 } 742 } 743 744 int virtio_set_status(VirtIODevice *vdev, uint8_t val) 745 { 746 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 747 trace_virtio_set_status(vdev, val); 748 749 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 750 if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) && 751 val & VIRTIO_CONFIG_S_FEATURES_OK) { 752 int ret = virtio_validate_features(vdev); 753 754 if (ret) { 755 return ret; 756 } 757 } 758 } 759 if (k->set_status) { 760 k->set_status(vdev, val); 761 } 762 vdev->status = val; 763 return 0; 764 } 765 766 bool target_words_bigendian(void); 767 static enum virtio_device_endian virtio_default_endian(void) 768 { 769 if (target_words_bigendian()) { 770 return VIRTIO_DEVICE_ENDIAN_BIG; 771 } else { 772 return VIRTIO_DEVICE_ENDIAN_LITTLE; 773 } 774 } 775 776 static enum virtio_device_endian virtio_current_cpu_endian(void) 777 { 778 CPUClass *cc = CPU_GET_CLASS(current_cpu); 779 780 if (cc->virtio_is_big_endian(current_cpu)) { 781 return VIRTIO_DEVICE_ENDIAN_BIG; 782 } else { 783 return VIRTIO_DEVICE_ENDIAN_LITTLE; 784 } 785 } 786 787 void virtio_reset(void *opaque) 788 { 789 VirtIODevice *vdev = opaque; 790 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 791 int i; 792 793 virtio_set_status(vdev, 0); 794 if (current_cpu) { 795 /* Guest initiated reset */ 796 vdev->device_endian = virtio_current_cpu_endian(); 797 } else { 798 /* System reset */ 799 vdev->device_endian = virtio_default_endian(); 800 } 801 802 if (k->reset) { 803 k->reset(vdev); 804 } 805 806 vdev->guest_features = 0; 807 vdev->queue_sel = 0; 808 vdev->status = 0; 809 vdev->isr = 0; 810 vdev->config_vector = VIRTIO_NO_VECTOR; 811 virtio_notify_vector(vdev, vdev->config_vector); 812 813 for(i = 0; i < VIRTIO_QUEUE_MAX; i++) { 814 vdev->vq[i].vring.desc = 0; 815 vdev->vq[i].vring.avail = 0; 816 vdev->vq[i].vring.used = 0; 817 vdev->vq[i].last_avail_idx = 0; 818 vdev->vq[i].shadow_avail_idx = 0; 819 vdev->vq[i].used_idx = 0; 820 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); 821 vdev->vq[i].signalled_used = 0; 822 vdev->vq[i].signalled_used_valid = false; 823 vdev->vq[i].notification = true; 824 vdev->vq[i].vring.num = vdev->vq[i].vring.num_default; 825 } 826 } 827 828 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr) 829 { 830 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 831 uint8_t val; 832 833 if (addr + sizeof(val) > vdev->config_len) { 834 return (uint32_t)-1; 835 } 836 837 k->get_config(vdev, vdev->config); 838 839 val = ldub_p(vdev->config + addr); 840 return val; 841 } 842 843 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr) 844 { 845 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 846 uint16_t val; 847 848 if (addr + sizeof(val) > vdev->config_len) { 849 return (uint32_t)-1; 850 } 851 852 k->get_config(vdev, vdev->config); 853 854 val = lduw_p(vdev->config + addr); 855 return val; 856 } 857 858 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr) 859 { 860 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 861 uint32_t val; 862 863 if (addr + sizeof(val) > vdev->config_len) { 864 return (uint32_t)-1; 865 } 866 867 k->get_config(vdev, vdev->config); 868 869 val = ldl_p(vdev->config + addr); 870 return val; 871 } 872 873 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data) 874 { 875 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 876 uint8_t val = data; 877 878 if (addr + sizeof(val) > vdev->config_len) { 879 return; 880 } 881 882 stb_p(vdev->config + addr, val); 883 884 if (k->set_config) { 885 k->set_config(vdev, vdev->config); 886 } 887 } 888 889 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data) 890 { 891 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 892 uint16_t val = data; 893 894 if (addr + sizeof(val) > vdev->config_len) { 895 return; 896 } 897 898 stw_p(vdev->config + addr, val); 899 900 if (k->set_config) { 901 k->set_config(vdev, vdev->config); 902 } 903 } 904 905 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data) 906 { 907 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 908 uint32_t val = data; 909 910 if (addr + sizeof(val) > vdev->config_len) { 911 return; 912 } 913 914 stl_p(vdev->config + addr, val); 915 916 if (k->set_config) { 917 k->set_config(vdev, vdev->config); 918 } 919 } 920 921 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr) 922 { 923 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 924 uint8_t val; 925 926 if (addr + sizeof(val) > vdev->config_len) { 927 return (uint32_t)-1; 928 } 929 930 k->get_config(vdev, vdev->config); 931 932 val = ldub_p(vdev->config + addr); 933 return val; 934 } 935 936 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr) 937 { 938 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 939 uint16_t val; 940 941 if (addr + sizeof(val) > vdev->config_len) { 942 return (uint32_t)-1; 943 } 944 945 k->get_config(vdev, vdev->config); 946 947 val = lduw_le_p(vdev->config + addr); 948 return val; 949 } 950 951 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr) 952 { 953 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 954 uint32_t val; 955 956 if (addr + sizeof(val) > vdev->config_len) { 957 return (uint32_t)-1; 958 } 959 960 k->get_config(vdev, vdev->config); 961 962 val = ldl_le_p(vdev->config + addr); 963 return val; 964 } 965 966 void virtio_config_modern_writeb(VirtIODevice *vdev, 967 uint32_t addr, uint32_t data) 968 { 969 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 970 uint8_t val = data; 971 972 if (addr + sizeof(val) > vdev->config_len) { 973 return; 974 } 975 976 stb_p(vdev->config + addr, val); 977 978 if (k->set_config) { 979 k->set_config(vdev, vdev->config); 980 } 981 } 982 983 void virtio_config_modern_writew(VirtIODevice *vdev, 984 uint32_t addr, uint32_t data) 985 { 986 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 987 uint16_t val = data; 988 989 if (addr + sizeof(val) > vdev->config_len) { 990 return; 991 } 992 993 stw_le_p(vdev->config + addr, val); 994 995 if (k->set_config) { 996 k->set_config(vdev, vdev->config); 997 } 998 } 999 1000 void virtio_config_modern_writel(VirtIODevice *vdev, 1001 uint32_t addr, uint32_t data) 1002 { 1003 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1004 uint32_t val = data; 1005 1006 if (addr + sizeof(val) > vdev->config_len) { 1007 return; 1008 } 1009 1010 stl_le_p(vdev->config + addr, val); 1011 1012 if (k->set_config) { 1013 k->set_config(vdev, vdev->config); 1014 } 1015 } 1016 1017 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr) 1018 { 1019 vdev->vq[n].vring.desc = addr; 1020 virtio_queue_update_rings(vdev, n); 1021 } 1022 1023 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n) 1024 { 1025 return vdev->vq[n].vring.desc; 1026 } 1027 1028 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc, 1029 hwaddr avail, hwaddr used) 1030 { 1031 vdev->vq[n].vring.desc = desc; 1032 vdev->vq[n].vring.avail = avail; 1033 vdev->vq[n].vring.used = used; 1034 } 1035 1036 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) 1037 { 1038 /* Don't allow guest to flip queue between existent and 1039 * nonexistent states, or to set it to an invalid size. 1040 */ 1041 if (!!num != !!vdev->vq[n].vring.num || 1042 num > VIRTQUEUE_MAX_SIZE || 1043 num < 0) { 1044 return; 1045 } 1046 vdev->vq[n].vring.num = num; 1047 } 1048 1049 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector) 1050 { 1051 return QLIST_FIRST(&vdev->vector_queues[vector]); 1052 } 1053 1054 VirtQueue *virtio_vector_next_queue(VirtQueue *vq) 1055 { 1056 return QLIST_NEXT(vq, node); 1057 } 1058 1059 int virtio_queue_get_num(VirtIODevice *vdev, int n) 1060 { 1061 return vdev->vq[n].vring.num; 1062 } 1063 1064 int virtio_get_num_queues(VirtIODevice *vdev) 1065 { 1066 int i; 1067 1068 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1069 if (!virtio_queue_get_num(vdev, i)) { 1070 break; 1071 } 1072 } 1073 1074 return i; 1075 } 1076 1077 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) 1078 { 1079 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1080 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1081 1082 /* virtio-1 compliant devices cannot change the alignment */ 1083 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 1084 error_report("tried to modify queue alignment for virtio-1 device"); 1085 return; 1086 } 1087 /* Check that the transport told us it was going to do this 1088 * (so a buggy transport will immediately assert rather than 1089 * silently failing to migrate this state) 1090 */ 1091 assert(k->has_variable_vring_alignment); 1092 1093 vdev->vq[n].vring.align = align; 1094 virtio_queue_update_rings(vdev, n); 1095 } 1096 1097 static void virtio_queue_notify_aio_vq(VirtQueue *vq) 1098 { 1099 if (vq->vring.desc && vq->handle_aio_output) { 1100 VirtIODevice *vdev = vq->vdev; 1101 1102 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); 1103 vq->handle_aio_output(vdev, vq); 1104 } 1105 } 1106 1107 static void virtio_queue_notify_vq(VirtQueue *vq) 1108 { 1109 if (vq->vring.desc && vq->handle_output) { 1110 VirtIODevice *vdev = vq->vdev; 1111 1112 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); 1113 vq->handle_output(vdev, vq); 1114 } 1115 } 1116 1117 void virtio_queue_notify(VirtIODevice *vdev, int n) 1118 { 1119 virtio_queue_notify_vq(&vdev->vq[n]); 1120 } 1121 1122 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) 1123 { 1124 return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector : 1125 VIRTIO_NO_VECTOR; 1126 } 1127 1128 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector) 1129 { 1130 VirtQueue *vq = &vdev->vq[n]; 1131 1132 if (n < VIRTIO_QUEUE_MAX) { 1133 if (vdev->vector_queues && 1134 vdev->vq[n].vector != VIRTIO_NO_VECTOR) { 1135 QLIST_REMOVE(vq, node); 1136 } 1137 vdev->vq[n].vector = vector; 1138 if (vdev->vector_queues && 1139 vector != VIRTIO_NO_VECTOR) { 1140 QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node); 1141 } 1142 } 1143 } 1144 1145 static VirtQueue *virtio_add_queue_internal(VirtIODevice *vdev, int queue_size, 1146 VirtIOHandleOutput handle_output, 1147 bool use_aio) 1148 { 1149 int i; 1150 1151 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1152 if (vdev->vq[i].vring.num == 0) 1153 break; 1154 } 1155 1156 if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) 1157 abort(); 1158 1159 vdev->vq[i].vring.num = queue_size; 1160 vdev->vq[i].vring.num_default = queue_size; 1161 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN; 1162 vdev->vq[i].handle_output = handle_output; 1163 vdev->vq[i].handle_aio_output = NULL; 1164 vdev->vq[i].use_aio = use_aio; 1165 1166 return &vdev->vq[i]; 1167 } 1168 1169 /* Add a virt queue and mark AIO. 1170 * An AIO queue will use the AioContext based event interface instead of the 1171 * default IOHandler and EventNotifier interface. 1172 */ 1173 VirtQueue *virtio_add_queue_aio(VirtIODevice *vdev, int queue_size, 1174 VirtIOHandleOutput handle_output) 1175 { 1176 return virtio_add_queue_internal(vdev, queue_size, handle_output, true); 1177 } 1178 1179 /* Add a normal virt queue (on the contrary to the AIO version above. */ 1180 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, 1181 VirtIOHandleOutput handle_output) 1182 { 1183 return virtio_add_queue_internal(vdev, queue_size, handle_output, false); 1184 } 1185 1186 void virtio_del_queue(VirtIODevice *vdev, int n) 1187 { 1188 if (n < 0 || n >= VIRTIO_QUEUE_MAX) { 1189 abort(); 1190 } 1191 1192 vdev->vq[n].vring.num = 0; 1193 vdev->vq[n].vring.num_default = 0; 1194 } 1195 1196 void virtio_irq(VirtQueue *vq) 1197 { 1198 trace_virtio_irq(vq); 1199 vq->vdev->isr |= 0x01; 1200 virtio_notify_vector(vq->vdev, vq->vector); 1201 } 1202 1203 bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) 1204 { 1205 uint16_t old, new; 1206 bool v; 1207 /* We need to expose used array entries before checking used event. */ 1208 smp_mb(); 1209 /* Always notify when queue is empty (when feature acknowledge) */ 1210 if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) && 1211 !vq->inuse && virtio_queue_empty(vq)) { 1212 return true; 1213 } 1214 1215 if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 1216 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); 1217 } 1218 1219 v = vq->signalled_used_valid; 1220 vq->signalled_used_valid = true; 1221 old = vq->signalled_used; 1222 new = vq->signalled_used = vq->used_idx; 1223 return !v || vring_need_event(vring_get_used_event(vq), new, old); 1224 } 1225 1226 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) 1227 { 1228 if (!virtio_should_notify(vdev, vq)) { 1229 return; 1230 } 1231 1232 trace_virtio_notify(vdev, vq); 1233 vdev->isr |= 0x01; 1234 virtio_notify_vector(vdev, vq->vector); 1235 } 1236 1237 void virtio_notify_config(VirtIODevice *vdev) 1238 { 1239 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 1240 return; 1241 1242 vdev->isr |= 0x03; 1243 vdev->generation++; 1244 virtio_notify_vector(vdev, vdev->config_vector); 1245 } 1246 1247 static bool virtio_device_endian_needed(void *opaque) 1248 { 1249 VirtIODevice *vdev = opaque; 1250 1251 assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN); 1252 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 1253 return vdev->device_endian != virtio_default_endian(); 1254 } 1255 /* Devices conforming to VIRTIO 1.0 or later are always LE. */ 1256 return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE; 1257 } 1258 1259 static bool virtio_64bit_features_needed(void *opaque) 1260 { 1261 VirtIODevice *vdev = opaque; 1262 1263 return (vdev->host_features >> 32) != 0; 1264 } 1265 1266 static bool virtio_virtqueue_needed(void *opaque) 1267 { 1268 VirtIODevice *vdev = opaque; 1269 1270 return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1); 1271 } 1272 1273 static bool virtio_ringsize_needed(void *opaque) 1274 { 1275 VirtIODevice *vdev = opaque; 1276 int i; 1277 1278 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1279 if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) { 1280 return true; 1281 } 1282 } 1283 return false; 1284 } 1285 1286 static bool virtio_extra_state_needed(void *opaque) 1287 { 1288 VirtIODevice *vdev = opaque; 1289 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1290 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1291 1292 return k->has_extra_state && 1293 k->has_extra_state(qbus->parent); 1294 } 1295 1296 static const VMStateDescription vmstate_virtqueue = { 1297 .name = "virtqueue_state", 1298 .version_id = 1, 1299 .minimum_version_id = 1, 1300 .fields = (VMStateField[]) { 1301 VMSTATE_UINT64(vring.avail, struct VirtQueue), 1302 VMSTATE_UINT64(vring.used, struct VirtQueue), 1303 VMSTATE_END_OF_LIST() 1304 } 1305 }; 1306 1307 static const VMStateDescription vmstate_virtio_virtqueues = { 1308 .name = "virtio/virtqueues", 1309 .version_id = 1, 1310 .minimum_version_id = 1, 1311 .needed = &virtio_virtqueue_needed, 1312 .fields = (VMStateField[]) { 1313 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, 1314 VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue), 1315 VMSTATE_END_OF_LIST() 1316 } 1317 }; 1318 1319 static const VMStateDescription vmstate_ringsize = { 1320 .name = "ringsize_state", 1321 .version_id = 1, 1322 .minimum_version_id = 1, 1323 .fields = (VMStateField[]) { 1324 VMSTATE_UINT32(vring.num_default, struct VirtQueue), 1325 VMSTATE_END_OF_LIST() 1326 } 1327 }; 1328 1329 static const VMStateDescription vmstate_virtio_ringsize = { 1330 .name = "virtio/ringsize", 1331 .version_id = 1, 1332 .minimum_version_id = 1, 1333 .needed = &virtio_ringsize_needed, 1334 .fields = (VMStateField[]) { 1335 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, 1336 VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue), 1337 VMSTATE_END_OF_LIST() 1338 } 1339 }; 1340 1341 static int get_extra_state(QEMUFile *f, void *pv, size_t size) 1342 { 1343 VirtIODevice *vdev = pv; 1344 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1345 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1346 1347 if (!k->load_extra_state) { 1348 return -1; 1349 } else { 1350 return k->load_extra_state(qbus->parent, f); 1351 } 1352 } 1353 1354 static void put_extra_state(QEMUFile *f, void *pv, size_t size) 1355 { 1356 VirtIODevice *vdev = pv; 1357 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1358 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1359 1360 k->save_extra_state(qbus->parent, f); 1361 } 1362 1363 static const VMStateInfo vmstate_info_extra_state = { 1364 .name = "virtqueue_extra_state", 1365 .get = get_extra_state, 1366 .put = put_extra_state, 1367 }; 1368 1369 static const VMStateDescription vmstate_virtio_extra_state = { 1370 .name = "virtio/extra_state", 1371 .version_id = 1, 1372 .minimum_version_id = 1, 1373 .needed = &virtio_extra_state_needed, 1374 .fields = (VMStateField[]) { 1375 { 1376 .name = "extra_state", 1377 .version_id = 0, 1378 .field_exists = NULL, 1379 .size = 0, 1380 .info = &vmstate_info_extra_state, 1381 .flags = VMS_SINGLE, 1382 .offset = 0, 1383 }, 1384 VMSTATE_END_OF_LIST() 1385 } 1386 }; 1387 1388 static const VMStateDescription vmstate_virtio_device_endian = { 1389 .name = "virtio/device_endian", 1390 .version_id = 1, 1391 .minimum_version_id = 1, 1392 .needed = &virtio_device_endian_needed, 1393 .fields = (VMStateField[]) { 1394 VMSTATE_UINT8(device_endian, VirtIODevice), 1395 VMSTATE_END_OF_LIST() 1396 } 1397 }; 1398 1399 static const VMStateDescription vmstate_virtio_64bit_features = { 1400 .name = "virtio/64bit_features", 1401 .version_id = 1, 1402 .minimum_version_id = 1, 1403 .needed = &virtio_64bit_features_needed, 1404 .fields = (VMStateField[]) { 1405 VMSTATE_UINT64(guest_features, VirtIODevice), 1406 VMSTATE_END_OF_LIST() 1407 } 1408 }; 1409 1410 static const VMStateDescription vmstate_virtio = { 1411 .name = "virtio", 1412 .version_id = 1, 1413 .minimum_version_id = 1, 1414 .minimum_version_id_old = 1, 1415 .fields = (VMStateField[]) { 1416 VMSTATE_END_OF_LIST() 1417 }, 1418 .subsections = (const VMStateDescription*[]) { 1419 &vmstate_virtio_device_endian, 1420 &vmstate_virtio_64bit_features, 1421 &vmstate_virtio_virtqueues, 1422 &vmstate_virtio_ringsize, 1423 &vmstate_virtio_extra_state, 1424 NULL 1425 } 1426 }; 1427 1428 void virtio_save(VirtIODevice *vdev, QEMUFile *f) 1429 { 1430 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1431 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1432 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 1433 uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff); 1434 int i; 1435 1436 if (k->save_config) { 1437 k->save_config(qbus->parent, f); 1438 } 1439 1440 qemu_put_8s(f, &vdev->status); 1441 qemu_put_8s(f, &vdev->isr); 1442 qemu_put_be16s(f, &vdev->queue_sel); 1443 qemu_put_be32s(f, &guest_features_lo); 1444 qemu_put_be32(f, vdev->config_len); 1445 qemu_put_buffer(f, vdev->config, vdev->config_len); 1446 1447 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1448 if (vdev->vq[i].vring.num == 0) 1449 break; 1450 } 1451 1452 qemu_put_be32(f, i); 1453 1454 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1455 if (vdev->vq[i].vring.num == 0) 1456 break; 1457 1458 qemu_put_be32(f, vdev->vq[i].vring.num); 1459 if (k->has_variable_vring_alignment) { 1460 qemu_put_be32(f, vdev->vq[i].vring.align); 1461 } 1462 /* XXX virtio-1 devices */ 1463 qemu_put_be64(f, vdev->vq[i].vring.desc); 1464 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); 1465 if (k->save_queue) { 1466 k->save_queue(qbus->parent, i, f); 1467 } 1468 } 1469 1470 if (vdc->save != NULL) { 1471 vdc->save(vdev, f); 1472 } 1473 1474 /* Subsections */ 1475 vmstate_save_state(f, &vmstate_virtio, vdev, NULL); 1476 } 1477 1478 /* A wrapper for use as a VMState .put function */ 1479 void virtio_vmstate_save(QEMUFile *f, void *opaque, size_t size) 1480 { 1481 virtio_save(VIRTIO_DEVICE(opaque), f); 1482 } 1483 1484 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) 1485 { 1486 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1487 bool bad = (val & ~(vdev->host_features)) != 0; 1488 1489 val &= vdev->host_features; 1490 if (k->set_features) { 1491 k->set_features(vdev, val); 1492 } 1493 vdev->guest_features = val; 1494 return bad ? -1 : 0; 1495 } 1496 1497 int virtio_set_features(VirtIODevice *vdev, uint64_t val) 1498 { 1499 /* 1500 * The driver must not attempt to set features after feature negotiation 1501 * has finished. 1502 */ 1503 if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) { 1504 return -EINVAL; 1505 } 1506 return virtio_set_features_nocheck(vdev, val); 1507 } 1508 1509 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) 1510 { 1511 int i, ret; 1512 int32_t config_len; 1513 uint32_t num; 1514 uint32_t features; 1515 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1516 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1517 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 1518 1519 /* 1520 * We poison the endianness to ensure it does not get used before 1521 * subsections have been loaded. 1522 */ 1523 vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN; 1524 1525 if (k->load_config) { 1526 ret = k->load_config(qbus->parent, f); 1527 if (ret) 1528 return ret; 1529 } 1530 1531 qemu_get_8s(f, &vdev->status); 1532 qemu_get_8s(f, &vdev->isr); 1533 qemu_get_be16s(f, &vdev->queue_sel); 1534 if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) { 1535 return -1; 1536 } 1537 qemu_get_be32s(f, &features); 1538 1539 /* 1540 * Temporarily set guest_features low bits - needed by 1541 * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 1542 * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ. 1543 * 1544 * Note: devices should always test host features in future - don't create 1545 * new dependencies like this. 1546 */ 1547 vdev->guest_features = features; 1548 1549 config_len = qemu_get_be32(f); 1550 1551 /* 1552 * There are cases where the incoming config can be bigger or smaller 1553 * than what we have; so load what we have space for, and skip 1554 * any excess that's in the stream. 1555 */ 1556 qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len)); 1557 1558 while (config_len > vdev->config_len) { 1559 qemu_get_byte(f); 1560 config_len--; 1561 } 1562 1563 num = qemu_get_be32(f); 1564 1565 if (num > VIRTIO_QUEUE_MAX) { 1566 error_report("Invalid number of virtqueues: 0x%x", num); 1567 return -1; 1568 } 1569 1570 for (i = 0; i < num; i++) { 1571 vdev->vq[i].vring.num = qemu_get_be32(f); 1572 if (k->has_variable_vring_alignment) { 1573 vdev->vq[i].vring.align = qemu_get_be32(f); 1574 } 1575 vdev->vq[i].vring.desc = qemu_get_be64(f); 1576 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); 1577 vdev->vq[i].signalled_used_valid = false; 1578 vdev->vq[i].notification = true; 1579 1580 if (vdev->vq[i].vring.desc) { 1581 /* XXX virtio-1 devices */ 1582 virtio_queue_update_rings(vdev, i); 1583 } else if (vdev->vq[i].last_avail_idx) { 1584 error_report("VQ %d address 0x0 " 1585 "inconsistent with Host index 0x%x", 1586 i, vdev->vq[i].last_avail_idx); 1587 return -1; 1588 } 1589 if (k->load_queue) { 1590 ret = k->load_queue(qbus->parent, i, f); 1591 if (ret) 1592 return ret; 1593 } 1594 } 1595 1596 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 1597 1598 if (vdc->load != NULL) { 1599 ret = vdc->load(vdev, f, version_id); 1600 if (ret) { 1601 return ret; 1602 } 1603 } 1604 1605 /* Subsections */ 1606 ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1); 1607 if (ret) { 1608 return ret; 1609 } 1610 1611 if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) { 1612 vdev->device_endian = virtio_default_endian(); 1613 } 1614 1615 if (virtio_64bit_features_needed(vdev)) { 1616 /* 1617 * Subsection load filled vdev->guest_features. Run them 1618 * through virtio_set_features to sanity-check them against 1619 * host_features. 1620 */ 1621 uint64_t features64 = vdev->guest_features; 1622 if (virtio_set_features_nocheck(vdev, features64) < 0) { 1623 error_report("Features 0x%" PRIx64 " unsupported. " 1624 "Allowed features: 0x%" PRIx64, 1625 features64, vdev->host_features); 1626 return -1; 1627 } 1628 } else { 1629 if (virtio_set_features_nocheck(vdev, features) < 0) { 1630 error_report("Features 0x%x unsupported. " 1631 "Allowed features: 0x%" PRIx64, 1632 features, vdev->host_features); 1633 return -1; 1634 } 1635 } 1636 1637 for (i = 0; i < num; i++) { 1638 if (vdev->vq[i].vring.desc) { 1639 uint16_t nheads; 1640 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx; 1641 /* Check it isn't doing strange things with descriptor numbers. */ 1642 if (nheads > vdev->vq[i].vring.num) { 1643 error_report("VQ %d size 0x%x Guest index 0x%x " 1644 "inconsistent with Host index 0x%x: delta 0x%x", 1645 i, vdev->vq[i].vring.num, 1646 vring_avail_idx(&vdev->vq[i]), 1647 vdev->vq[i].last_avail_idx, nheads); 1648 return -1; 1649 } 1650 vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]); 1651 vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]); 1652 1653 /* 1654 * Some devices migrate VirtQueueElements that have been popped 1655 * from the avail ring but not yet returned to the used ring. 1656 */ 1657 vdev->vq[i].inuse = vdev->vq[i].last_avail_idx - 1658 vdev->vq[i].used_idx; 1659 if (vdev->vq[i].inuse > vdev->vq[i].vring.num) { 1660 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - " 1661 "used_idx 0x%x", 1662 i, vdev->vq[i].vring.num, 1663 vdev->vq[i].last_avail_idx, 1664 vdev->vq[i].used_idx); 1665 return -1; 1666 } 1667 } 1668 } 1669 1670 return 0; 1671 } 1672 1673 void virtio_cleanup(VirtIODevice *vdev) 1674 { 1675 qemu_del_vm_change_state_handler(vdev->vmstate); 1676 g_free(vdev->config); 1677 g_free(vdev->vq); 1678 g_free(vdev->vector_queues); 1679 } 1680 1681 static void virtio_vmstate_change(void *opaque, int running, RunState state) 1682 { 1683 VirtIODevice *vdev = opaque; 1684 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1685 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1686 bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); 1687 vdev->vm_running = running; 1688 1689 if (backend_run) { 1690 virtio_set_status(vdev, vdev->status); 1691 } 1692 1693 if (k->vmstate_change) { 1694 k->vmstate_change(qbus->parent, backend_run); 1695 } 1696 1697 if (!backend_run) { 1698 virtio_set_status(vdev, vdev->status); 1699 } 1700 } 1701 1702 void virtio_instance_init_common(Object *proxy_obj, void *data, 1703 size_t vdev_size, const char *vdev_name) 1704 { 1705 DeviceState *vdev = data; 1706 1707 object_initialize(vdev, vdev_size, vdev_name); 1708 object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL); 1709 object_unref(OBJECT(vdev)); 1710 qdev_alias_all_properties(vdev, proxy_obj); 1711 } 1712 1713 void virtio_init(VirtIODevice *vdev, const char *name, 1714 uint16_t device_id, size_t config_size) 1715 { 1716 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1717 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1718 int i; 1719 int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0; 1720 1721 if (nvectors) { 1722 vdev->vector_queues = 1723 g_malloc0(sizeof(*vdev->vector_queues) * nvectors); 1724 } 1725 1726 vdev->device_id = device_id; 1727 vdev->status = 0; 1728 vdev->isr = 0; 1729 vdev->queue_sel = 0; 1730 vdev->config_vector = VIRTIO_NO_VECTOR; 1731 vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX); 1732 vdev->vm_running = runstate_is_running(); 1733 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1734 vdev->vq[i].vector = VIRTIO_NO_VECTOR; 1735 vdev->vq[i].vdev = vdev; 1736 vdev->vq[i].queue_index = i; 1737 } 1738 1739 vdev->name = name; 1740 vdev->config_len = config_size; 1741 if (vdev->config_len) { 1742 vdev->config = g_malloc0(config_size); 1743 } else { 1744 vdev->config = NULL; 1745 } 1746 vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, 1747 vdev); 1748 vdev->device_endian = virtio_default_endian(); 1749 vdev->use_guest_notifier_mask = true; 1750 } 1751 1752 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) 1753 { 1754 return vdev->vq[n].vring.desc; 1755 } 1756 1757 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n) 1758 { 1759 return vdev->vq[n].vring.avail; 1760 } 1761 1762 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n) 1763 { 1764 return vdev->vq[n].vring.used; 1765 } 1766 1767 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n) 1768 { 1769 return vdev->vq[n].vring.desc; 1770 } 1771 1772 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) 1773 { 1774 return sizeof(VRingDesc) * vdev->vq[n].vring.num; 1775 } 1776 1777 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) 1778 { 1779 return offsetof(VRingAvail, ring) + 1780 sizeof(uint16_t) * vdev->vq[n].vring.num; 1781 } 1782 1783 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) 1784 { 1785 return offsetof(VRingUsed, ring) + 1786 sizeof(VRingUsedElem) * vdev->vq[n].vring.num; 1787 } 1788 1789 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n) 1790 { 1791 return vdev->vq[n].vring.used - vdev->vq[n].vring.desc + 1792 virtio_queue_get_used_size(vdev, n); 1793 } 1794 1795 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) 1796 { 1797 return vdev->vq[n].last_avail_idx; 1798 } 1799 1800 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) 1801 { 1802 vdev->vq[n].last_avail_idx = idx; 1803 vdev->vq[n].shadow_avail_idx = idx; 1804 } 1805 1806 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n) 1807 { 1808 vdev->vq[n].signalled_used_valid = false; 1809 } 1810 1811 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n) 1812 { 1813 return vdev->vq + n; 1814 } 1815 1816 uint16_t virtio_get_queue_index(VirtQueue *vq) 1817 { 1818 return vq->queue_index; 1819 } 1820 1821 static void virtio_queue_guest_notifier_read(EventNotifier *n) 1822 { 1823 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier); 1824 if (event_notifier_test_and_clear(n)) { 1825 virtio_irq(vq); 1826 } 1827 } 1828 1829 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, 1830 bool with_irqfd) 1831 { 1832 if (assign && !with_irqfd) { 1833 event_notifier_set_handler(&vq->guest_notifier, false, 1834 virtio_queue_guest_notifier_read); 1835 } else { 1836 event_notifier_set_handler(&vq->guest_notifier, false, NULL); 1837 } 1838 if (!assign) { 1839 /* Test and clear notifier before closing it, 1840 * in case poll callback didn't have time to run. */ 1841 virtio_queue_guest_notifier_read(&vq->guest_notifier); 1842 } 1843 } 1844 1845 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) 1846 { 1847 return &vq->guest_notifier; 1848 } 1849 1850 static void virtio_queue_host_notifier_aio_read(EventNotifier *n) 1851 { 1852 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 1853 if (event_notifier_test_and_clear(n)) { 1854 virtio_queue_notify_aio_vq(vq); 1855 } 1856 } 1857 1858 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx, 1859 VirtIOHandleOutput handle_output) 1860 { 1861 if (handle_output) { 1862 vq->handle_aio_output = handle_output; 1863 aio_set_event_notifier(ctx, &vq->host_notifier, true, 1864 virtio_queue_host_notifier_aio_read); 1865 } else { 1866 aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL); 1867 /* Test and clear notifier before after disabling event, 1868 * in case poll callback didn't have time to run. */ 1869 virtio_queue_host_notifier_aio_read(&vq->host_notifier); 1870 vq->handle_aio_output = NULL; 1871 } 1872 } 1873 1874 static void virtio_queue_host_notifier_read(EventNotifier *n) 1875 { 1876 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 1877 if (event_notifier_test_and_clear(n)) { 1878 virtio_queue_notify_vq(vq); 1879 } 1880 } 1881 1882 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, 1883 bool set_handler) 1884 { 1885 AioContext *ctx = qemu_get_aio_context(); 1886 if (assign && set_handler) { 1887 if (vq->use_aio) { 1888 aio_set_event_notifier(ctx, &vq->host_notifier, true, 1889 virtio_queue_host_notifier_read); 1890 } else { 1891 event_notifier_set_handler(&vq->host_notifier, true, 1892 virtio_queue_host_notifier_read); 1893 } 1894 } else { 1895 if (vq->use_aio) { 1896 aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL); 1897 } else { 1898 event_notifier_set_handler(&vq->host_notifier, true, NULL); 1899 } 1900 } 1901 if (!assign) { 1902 /* Test and clear notifier before after disabling event, 1903 * in case poll callback didn't have time to run. */ 1904 virtio_queue_host_notifier_read(&vq->host_notifier); 1905 } 1906 } 1907 1908 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) 1909 { 1910 return &vq->host_notifier; 1911 } 1912 1913 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) 1914 { 1915 g_free(vdev->bus_name); 1916 vdev->bus_name = g_strdup(bus_name); 1917 } 1918 1919 static void virtio_device_realize(DeviceState *dev, Error **errp) 1920 { 1921 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1922 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 1923 Error *err = NULL; 1924 1925 if (vdc->realize != NULL) { 1926 vdc->realize(dev, &err); 1927 if (err != NULL) { 1928 error_propagate(errp, err); 1929 return; 1930 } 1931 } 1932 1933 virtio_bus_device_plugged(vdev, &err); 1934 if (err != NULL) { 1935 error_propagate(errp, err); 1936 return; 1937 } 1938 } 1939 1940 static void virtio_device_unrealize(DeviceState *dev, Error **errp) 1941 { 1942 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1943 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 1944 Error *err = NULL; 1945 1946 virtio_bus_device_unplugged(vdev); 1947 1948 if (vdc->unrealize != NULL) { 1949 vdc->unrealize(dev, &err); 1950 if (err != NULL) { 1951 error_propagate(errp, err); 1952 return; 1953 } 1954 } 1955 1956 g_free(vdev->bus_name); 1957 vdev->bus_name = NULL; 1958 } 1959 1960 static Property virtio_properties[] = { 1961 DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features), 1962 DEFINE_PROP_END_OF_LIST(), 1963 }; 1964 1965 static void virtio_device_class_init(ObjectClass *klass, void *data) 1966 { 1967 /* Set the default value here. */ 1968 DeviceClass *dc = DEVICE_CLASS(klass); 1969 1970 dc->realize = virtio_device_realize; 1971 dc->unrealize = virtio_device_unrealize; 1972 dc->bus_type = TYPE_VIRTIO_BUS; 1973 dc->props = virtio_properties; 1974 } 1975 1976 static const TypeInfo virtio_device_info = { 1977 .name = TYPE_VIRTIO_DEVICE, 1978 .parent = TYPE_DEVICE, 1979 .instance_size = sizeof(VirtIODevice), 1980 .class_init = virtio_device_class_init, 1981 .abstract = true, 1982 .class_size = sizeof(VirtioDeviceClass), 1983 }; 1984 1985 static void virtio_register_types(void) 1986 { 1987 type_register_static(&virtio_device_info); 1988 } 1989 1990 type_init(virtio_register_types) 1991