1 /* 2 * Virtio Support 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qapi/error.h" 16 17 #include "trace.h" 18 #include "exec/address-spaces.h" 19 #include "qemu/error-report.h" 20 #include "hw/virtio/virtio.h" 21 #include "qemu/atomic.h" 22 #include "hw/virtio/virtio-bus.h" 23 #include "migration/migration.h" 24 #include "hw/virtio/virtio-access.h" 25 26 /* 27 * The alignment to use between consumer and producer parts of vring. 28 * x86 pagesize again. This is the default, used by transports like PCI 29 * which don't provide a means for the guest to tell the host the alignment. 30 */ 31 #define VIRTIO_PCI_VRING_ALIGN 4096 32 33 typedef struct VRingDesc 34 { 35 uint64_t addr; 36 uint32_t len; 37 uint16_t flags; 38 uint16_t next; 39 } VRingDesc; 40 41 typedef struct VRingAvail 42 { 43 uint16_t flags; 44 uint16_t idx; 45 uint16_t ring[0]; 46 } VRingAvail; 47 48 typedef struct VRingUsedElem 49 { 50 uint32_t id; 51 uint32_t len; 52 } VRingUsedElem; 53 54 typedef struct VRingUsed 55 { 56 uint16_t flags; 57 uint16_t idx; 58 VRingUsedElem ring[0]; 59 } VRingUsed; 60 61 typedef struct VRing 62 { 63 unsigned int num; 64 unsigned int num_default; 65 unsigned int align; 66 hwaddr desc; 67 hwaddr avail; 68 hwaddr used; 69 } VRing; 70 71 struct VirtQueue 72 { 73 VRing vring; 74 75 /* Next head to pop */ 76 uint16_t last_avail_idx; 77 78 /* Last avail_idx read from VQ. */ 79 uint16_t shadow_avail_idx; 80 81 uint16_t used_idx; 82 83 /* Last used index value we have signalled on */ 84 uint16_t signalled_used; 85 86 /* Last used index value we have signalled on */ 87 bool signalled_used_valid; 88 89 /* Notification enabled? */ 90 bool notification; 91 92 uint16_t queue_index; 93 94 int inuse; 95 96 uint16_t vector; 97 void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); 98 VirtIODevice *vdev; 99 EventNotifier guest_notifier; 100 EventNotifier host_notifier; 101 QLIST_ENTRY(VirtQueue) node; 102 }; 103 104 /* virt queue functions */ 105 void virtio_queue_update_rings(VirtIODevice *vdev, int n) 106 { 107 VRing *vring = &vdev->vq[n].vring; 108 109 if (!vring->desc) { 110 /* not yet setup -> nothing to do */ 111 return; 112 } 113 vring->avail = vring->desc + vring->num * sizeof(VRingDesc); 114 vring->used = vring_align(vring->avail + 115 offsetof(VRingAvail, ring[vring->num]), 116 vring->align); 117 } 118 119 static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, 120 hwaddr desc_pa, int i) 121 { 122 address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc), 123 MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc)); 124 virtio_tswap64s(vdev, &desc->addr); 125 virtio_tswap32s(vdev, &desc->len); 126 virtio_tswap16s(vdev, &desc->flags); 127 virtio_tswap16s(vdev, &desc->next); 128 } 129 130 static inline uint16_t vring_avail_flags(VirtQueue *vq) 131 { 132 hwaddr pa; 133 pa = vq->vring.avail + offsetof(VRingAvail, flags); 134 return virtio_lduw_phys(vq->vdev, pa); 135 } 136 137 static inline uint16_t vring_avail_idx(VirtQueue *vq) 138 { 139 hwaddr pa; 140 pa = vq->vring.avail + offsetof(VRingAvail, idx); 141 vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa); 142 return vq->shadow_avail_idx; 143 } 144 145 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) 146 { 147 hwaddr pa; 148 pa = vq->vring.avail + offsetof(VRingAvail, ring[i]); 149 return virtio_lduw_phys(vq->vdev, pa); 150 } 151 152 static inline uint16_t vring_get_used_event(VirtQueue *vq) 153 { 154 return vring_avail_ring(vq, vq->vring.num); 155 } 156 157 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem, 158 int i) 159 { 160 hwaddr pa; 161 virtio_tswap32s(vq->vdev, &uelem->id); 162 virtio_tswap32s(vq->vdev, &uelem->len); 163 pa = vq->vring.used + offsetof(VRingUsed, ring[i]); 164 address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED, 165 (void *)uelem, sizeof(VRingUsedElem)); 166 } 167 168 static uint16_t vring_used_idx(VirtQueue *vq) 169 { 170 hwaddr pa; 171 pa = vq->vring.used + offsetof(VRingUsed, idx); 172 return virtio_lduw_phys(vq->vdev, pa); 173 } 174 175 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) 176 { 177 hwaddr pa; 178 pa = vq->vring.used + offsetof(VRingUsed, idx); 179 virtio_stw_phys(vq->vdev, pa, val); 180 vq->used_idx = val; 181 } 182 183 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) 184 { 185 VirtIODevice *vdev = vq->vdev; 186 hwaddr pa; 187 pa = vq->vring.used + offsetof(VRingUsed, flags); 188 virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask); 189 } 190 191 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) 192 { 193 VirtIODevice *vdev = vq->vdev; 194 hwaddr pa; 195 pa = vq->vring.used + offsetof(VRingUsed, flags); 196 virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask); 197 } 198 199 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) 200 { 201 hwaddr pa; 202 if (!vq->notification) { 203 return; 204 } 205 pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]); 206 virtio_stw_phys(vq->vdev, pa, val); 207 } 208 209 void virtio_queue_set_notification(VirtQueue *vq, int enable) 210 { 211 vq->notification = enable; 212 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { 213 vring_set_avail_event(vq, vring_avail_idx(vq)); 214 } else if (enable) { 215 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); 216 } else { 217 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); 218 } 219 if (enable) { 220 /* Expose avail event/used flags before caller checks the avail idx. */ 221 smp_mb(); 222 } 223 } 224 225 int virtio_queue_ready(VirtQueue *vq) 226 { 227 return vq->vring.avail != 0; 228 } 229 230 /* Fetch avail_idx from VQ memory only when we really need to know if 231 * guest has added some buffers. */ 232 int virtio_queue_empty(VirtQueue *vq) 233 { 234 if (vq->shadow_avail_idx != vq->last_avail_idx) { 235 return 0; 236 } 237 238 return vring_avail_idx(vq) == vq->last_avail_idx; 239 } 240 241 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, 242 unsigned int len) 243 { 244 unsigned int offset; 245 int i; 246 247 offset = 0; 248 for (i = 0; i < elem->in_num; i++) { 249 size_t size = MIN(len - offset, elem->in_sg[i].iov_len); 250 251 cpu_physical_memory_unmap(elem->in_sg[i].iov_base, 252 elem->in_sg[i].iov_len, 253 1, size); 254 255 offset += size; 256 } 257 258 for (i = 0; i < elem->out_num; i++) 259 cpu_physical_memory_unmap(elem->out_sg[i].iov_base, 260 elem->out_sg[i].iov_len, 261 0, elem->out_sg[i].iov_len); 262 } 263 264 void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem, 265 unsigned int len) 266 { 267 vq->last_avail_idx--; 268 virtqueue_unmap_sg(vq, elem, len); 269 } 270 271 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, 272 unsigned int len, unsigned int idx) 273 { 274 VRingUsedElem uelem; 275 276 trace_virtqueue_fill(vq, elem, len, idx); 277 278 virtqueue_unmap_sg(vq, elem, len); 279 280 idx = (idx + vq->used_idx) % vq->vring.num; 281 282 uelem.id = elem->index; 283 uelem.len = len; 284 vring_used_write(vq, &uelem, idx); 285 } 286 287 void virtqueue_flush(VirtQueue *vq, unsigned int count) 288 { 289 uint16_t old, new; 290 /* Make sure buffer is written before we update index. */ 291 smp_wmb(); 292 trace_virtqueue_flush(vq, count); 293 old = vq->used_idx; 294 new = old + count; 295 vring_used_idx_set(vq, new); 296 vq->inuse -= count; 297 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) 298 vq->signalled_used_valid = false; 299 } 300 301 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, 302 unsigned int len) 303 { 304 virtqueue_fill(vq, elem, len, 0); 305 virtqueue_flush(vq, 1); 306 } 307 308 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) 309 { 310 uint16_t num_heads = vring_avail_idx(vq) - idx; 311 312 /* Check it isn't doing very strange things with descriptor numbers. */ 313 if (num_heads > vq->vring.num) { 314 error_report("Guest moved used index from %u to %u", 315 idx, vq->shadow_avail_idx); 316 exit(1); 317 } 318 /* On success, callers read a descriptor at vq->last_avail_idx. 319 * Make sure descriptor read does not bypass avail index read. */ 320 if (num_heads) { 321 smp_rmb(); 322 } 323 324 return num_heads; 325 } 326 327 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) 328 { 329 unsigned int head; 330 331 /* Grab the next descriptor number they're advertising, and increment 332 * the index we've seen. */ 333 head = vring_avail_ring(vq, idx % vq->vring.num); 334 335 /* If their number is silly, that's a fatal mistake. */ 336 if (head >= vq->vring.num) { 337 error_report("Guest says index %u is available", head); 338 exit(1); 339 } 340 341 return head; 342 } 343 344 static unsigned virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, 345 hwaddr desc_pa, unsigned int max) 346 { 347 unsigned int next; 348 349 /* If this descriptor says it doesn't chain, we're done. */ 350 if (!(desc->flags & VRING_DESC_F_NEXT)) { 351 return max; 352 } 353 354 /* Check they're not leading us off end of descriptors. */ 355 next = desc->next; 356 /* Make sure compiler knows to grab that: we don't want it changing! */ 357 smp_wmb(); 358 359 if (next >= max) { 360 error_report("Desc next is %u", next); 361 exit(1); 362 } 363 364 vring_desc_read(vdev, desc, desc_pa, next); 365 return next; 366 } 367 368 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, 369 unsigned int *out_bytes, 370 unsigned max_in_bytes, unsigned max_out_bytes) 371 { 372 unsigned int idx; 373 unsigned int total_bufs, in_total, out_total; 374 375 idx = vq->last_avail_idx; 376 377 total_bufs = in_total = out_total = 0; 378 while (virtqueue_num_heads(vq, idx)) { 379 VirtIODevice *vdev = vq->vdev; 380 unsigned int max, num_bufs, indirect = 0; 381 VRingDesc desc; 382 hwaddr desc_pa; 383 int i; 384 385 max = vq->vring.num; 386 num_bufs = total_bufs; 387 i = virtqueue_get_head(vq, idx++); 388 desc_pa = vq->vring.desc; 389 vring_desc_read(vdev, &desc, desc_pa, i); 390 391 if (desc.flags & VRING_DESC_F_INDIRECT) { 392 if (desc.len % sizeof(VRingDesc)) { 393 error_report("Invalid size for indirect buffer table"); 394 exit(1); 395 } 396 397 /* If we've got too many, that implies a descriptor loop. */ 398 if (num_bufs >= max) { 399 error_report("Looped descriptor"); 400 exit(1); 401 } 402 403 /* loop over the indirect descriptor table */ 404 indirect = 1; 405 max = desc.len / sizeof(VRingDesc); 406 desc_pa = desc.addr; 407 num_bufs = i = 0; 408 vring_desc_read(vdev, &desc, desc_pa, i); 409 } 410 411 do { 412 /* If we've got too many, that implies a descriptor loop. */ 413 if (++num_bufs > max) { 414 error_report("Looped descriptor"); 415 exit(1); 416 } 417 418 if (desc.flags & VRING_DESC_F_WRITE) { 419 in_total += desc.len; 420 } else { 421 out_total += desc.len; 422 } 423 if (in_total >= max_in_bytes && out_total >= max_out_bytes) { 424 goto done; 425 } 426 } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max); 427 428 if (!indirect) 429 total_bufs = num_bufs; 430 else 431 total_bufs++; 432 } 433 done: 434 if (in_bytes) { 435 *in_bytes = in_total; 436 } 437 if (out_bytes) { 438 *out_bytes = out_total; 439 } 440 } 441 442 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, 443 unsigned int out_bytes) 444 { 445 unsigned int in_total, out_total; 446 447 virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes); 448 return in_bytes <= in_total && out_bytes <= out_total; 449 } 450 451 static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov, 452 unsigned int max_num_sg, bool is_write, 453 hwaddr pa, size_t sz) 454 { 455 unsigned num_sg = *p_num_sg; 456 assert(num_sg <= max_num_sg); 457 458 while (sz) { 459 hwaddr len = sz; 460 461 if (num_sg == max_num_sg) { 462 error_report("virtio: too many write descriptors in indirect table"); 463 exit(1); 464 } 465 466 iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write); 467 iov[num_sg].iov_len = len; 468 addr[num_sg] = pa; 469 470 sz -= len; 471 pa += len; 472 num_sg++; 473 } 474 *p_num_sg = num_sg; 475 } 476 477 static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr, 478 unsigned int *num_sg, unsigned int max_size, 479 int is_write) 480 { 481 unsigned int i; 482 hwaddr len; 483 484 /* Note: this function MUST validate input, some callers 485 * are passing in num_sg values received over the network. 486 */ 487 /* TODO: teach all callers that this can fail, and return failure instead 488 * of asserting here. 489 * When we do, we might be able to re-enable NDEBUG below. 490 */ 491 #ifdef NDEBUG 492 #error building with NDEBUG is not supported 493 #endif 494 assert(*num_sg <= max_size); 495 496 for (i = 0; i < *num_sg; i++) { 497 len = sg[i].iov_len; 498 sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write); 499 if (!sg[i].iov_base) { 500 error_report("virtio: error trying to map MMIO memory"); 501 exit(1); 502 } 503 if (len != sg[i].iov_len) { 504 error_report("virtio: unexpected memory split"); 505 exit(1); 506 } 507 } 508 } 509 510 void virtqueue_map(VirtQueueElement *elem) 511 { 512 virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num, 513 VIRTQUEUE_MAX_SIZE, 1); 514 virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num, 515 VIRTQUEUE_MAX_SIZE, 0); 516 } 517 518 void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num) 519 { 520 VirtQueueElement *elem; 521 size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0])); 522 size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]); 523 size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]); 524 size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0])); 525 size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]); 526 size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]); 527 528 assert(sz >= sizeof(VirtQueueElement)); 529 elem = g_malloc(out_sg_end); 530 elem->out_num = out_num; 531 elem->in_num = in_num; 532 elem->in_addr = (void *)elem + in_addr_ofs; 533 elem->out_addr = (void *)elem + out_addr_ofs; 534 elem->in_sg = (void *)elem + in_sg_ofs; 535 elem->out_sg = (void *)elem + out_sg_ofs; 536 return elem; 537 } 538 539 void *virtqueue_pop(VirtQueue *vq, size_t sz) 540 { 541 unsigned int i, head, max; 542 hwaddr desc_pa = vq->vring.desc; 543 VirtIODevice *vdev = vq->vdev; 544 VirtQueueElement *elem; 545 unsigned out_num, in_num; 546 hwaddr addr[VIRTQUEUE_MAX_SIZE]; 547 struct iovec iov[VIRTQUEUE_MAX_SIZE]; 548 VRingDesc desc; 549 550 if (virtio_queue_empty(vq)) { 551 return NULL; 552 } 553 /* Needed after virtio_queue_empty(), see comment in 554 * virtqueue_num_heads(). */ 555 smp_rmb(); 556 557 /* When we start there are none of either input nor output. */ 558 out_num = in_num = 0; 559 560 max = vq->vring.num; 561 562 i = head = virtqueue_get_head(vq, vq->last_avail_idx++); 563 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 564 vring_set_avail_event(vq, vq->last_avail_idx); 565 } 566 567 vring_desc_read(vdev, &desc, desc_pa, i); 568 if (desc.flags & VRING_DESC_F_INDIRECT) { 569 if (desc.len % sizeof(VRingDesc)) { 570 error_report("Invalid size for indirect buffer table"); 571 exit(1); 572 } 573 574 /* loop over the indirect descriptor table */ 575 max = desc.len / sizeof(VRingDesc); 576 desc_pa = desc.addr; 577 i = 0; 578 vring_desc_read(vdev, &desc, desc_pa, i); 579 } 580 581 /* Collect all the descriptors */ 582 do { 583 if (desc.flags & VRING_DESC_F_WRITE) { 584 virtqueue_map_desc(&in_num, addr + out_num, iov + out_num, 585 VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len); 586 } else { 587 if (in_num) { 588 error_report("Incorrect order for descriptors"); 589 exit(1); 590 } 591 virtqueue_map_desc(&out_num, addr, iov, 592 VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len); 593 } 594 595 /* If we've got too many, that implies a descriptor loop. */ 596 if ((in_num + out_num) > max) { 597 error_report("Looped descriptor"); 598 exit(1); 599 } 600 } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max); 601 602 /* Now copy what we have collected and mapped */ 603 elem = virtqueue_alloc_element(sz, out_num, in_num); 604 elem->index = head; 605 for (i = 0; i < out_num; i++) { 606 elem->out_addr[i] = addr[i]; 607 elem->out_sg[i] = iov[i]; 608 } 609 for (i = 0; i < in_num; i++) { 610 elem->in_addr[i] = addr[out_num + i]; 611 elem->in_sg[i] = iov[out_num + i]; 612 } 613 614 vq->inuse++; 615 616 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); 617 return elem; 618 } 619 620 /* Reading and writing a structure directly to QEMUFile is *awful*, but 621 * it is what QEMU has always done by mistake. We can change it sooner 622 * or later by bumping the version number of the affected vm states. 623 * In the meanwhile, since the in-memory layout of VirtQueueElement 624 * has changed, we need to marshal to and from the layout that was 625 * used before the change. 626 */ 627 typedef struct VirtQueueElementOld { 628 unsigned int index; 629 unsigned int out_num; 630 unsigned int in_num; 631 hwaddr in_addr[VIRTQUEUE_MAX_SIZE]; 632 hwaddr out_addr[VIRTQUEUE_MAX_SIZE]; 633 struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; 634 struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; 635 } VirtQueueElementOld; 636 637 void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz) 638 { 639 VirtQueueElement *elem; 640 VirtQueueElementOld data; 641 int i; 642 643 qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); 644 645 elem = virtqueue_alloc_element(sz, data.out_num, data.in_num); 646 elem->index = data.index; 647 648 for (i = 0; i < elem->in_num; i++) { 649 elem->in_addr[i] = data.in_addr[i]; 650 } 651 652 for (i = 0; i < elem->out_num; i++) { 653 elem->out_addr[i] = data.out_addr[i]; 654 } 655 656 for (i = 0; i < elem->in_num; i++) { 657 /* Base is overwritten by virtqueue_map. */ 658 elem->in_sg[i].iov_base = 0; 659 elem->in_sg[i].iov_len = data.in_sg[i].iov_len; 660 } 661 662 for (i = 0; i < elem->out_num; i++) { 663 /* Base is overwritten by virtqueue_map. */ 664 elem->out_sg[i].iov_base = 0; 665 elem->out_sg[i].iov_len = data.out_sg[i].iov_len; 666 } 667 668 virtqueue_map(elem); 669 return elem; 670 } 671 672 void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem) 673 { 674 VirtQueueElementOld data; 675 int i; 676 677 memset(&data, 0, sizeof(data)); 678 data.index = elem->index; 679 data.in_num = elem->in_num; 680 data.out_num = elem->out_num; 681 682 for (i = 0; i < elem->in_num; i++) { 683 data.in_addr[i] = elem->in_addr[i]; 684 } 685 686 for (i = 0; i < elem->out_num; i++) { 687 data.out_addr[i] = elem->out_addr[i]; 688 } 689 690 for (i = 0; i < elem->in_num; i++) { 691 /* Base is overwritten by virtqueue_map when loading. Do not 692 * save it, as it would leak the QEMU address space layout. */ 693 data.in_sg[i].iov_len = elem->in_sg[i].iov_len; 694 } 695 696 for (i = 0; i < elem->out_num; i++) { 697 /* Do not save iov_base as above. */ 698 data.out_sg[i].iov_len = elem->out_sg[i].iov_len; 699 } 700 qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); 701 } 702 703 /* virtio device */ 704 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector) 705 { 706 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 707 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 708 709 if (k->notify) { 710 k->notify(qbus->parent, vector); 711 } 712 } 713 714 void virtio_update_irq(VirtIODevice *vdev) 715 { 716 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 717 } 718 719 static int virtio_validate_features(VirtIODevice *vdev) 720 { 721 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 722 723 if (k->validate_features) { 724 return k->validate_features(vdev); 725 } else { 726 return 0; 727 } 728 } 729 730 int virtio_set_status(VirtIODevice *vdev, uint8_t val) 731 { 732 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 733 trace_virtio_set_status(vdev, val); 734 735 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 736 if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) && 737 val & VIRTIO_CONFIG_S_FEATURES_OK) { 738 int ret = virtio_validate_features(vdev); 739 740 if (ret) { 741 return ret; 742 } 743 } 744 } 745 if (k->set_status) { 746 k->set_status(vdev, val); 747 } 748 vdev->status = val; 749 return 0; 750 } 751 752 bool target_words_bigendian(void); 753 static enum virtio_device_endian virtio_default_endian(void) 754 { 755 if (target_words_bigendian()) { 756 return VIRTIO_DEVICE_ENDIAN_BIG; 757 } else { 758 return VIRTIO_DEVICE_ENDIAN_LITTLE; 759 } 760 } 761 762 static enum virtio_device_endian virtio_current_cpu_endian(void) 763 { 764 CPUClass *cc = CPU_GET_CLASS(current_cpu); 765 766 if (cc->virtio_is_big_endian(current_cpu)) { 767 return VIRTIO_DEVICE_ENDIAN_BIG; 768 } else { 769 return VIRTIO_DEVICE_ENDIAN_LITTLE; 770 } 771 } 772 773 void virtio_reset(void *opaque) 774 { 775 VirtIODevice *vdev = opaque; 776 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 777 int i; 778 779 virtio_set_status(vdev, 0); 780 if (current_cpu) { 781 /* Guest initiated reset */ 782 vdev->device_endian = virtio_current_cpu_endian(); 783 } else { 784 /* System reset */ 785 vdev->device_endian = virtio_default_endian(); 786 } 787 788 if (k->reset) { 789 k->reset(vdev); 790 } 791 792 vdev->guest_features = 0; 793 vdev->queue_sel = 0; 794 vdev->status = 0; 795 vdev->isr = 0; 796 vdev->config_vector = VIRTIO_NO_VECTOR; 797 virtio_notify_vector(vdev, vdev->config_vector); 798 799 for(i = 0; i < VIRTIO_QUEUE_MAX; i++) { 800 vdev->vq[i].vring.desc = 0; 801 vdev->vq[i].vring.avail = 0; 802 vdev->vq[i].vring.used = 0; 803 vdev->vq[i].last_avail_idx = 0; 804 vdev->vq[i].shadow_avail_idx = 0; 805 vdev->vq[i].used_idx = 0; 806 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); 807 vdev->vq[i].signalled_used = 0; 808 vdev->vq[i].signalled_used_valid = false; 809 vdev->vq[i].notification = true; 810 vdev->vq[i].vring.num = vdev->vq[i].vring.num_default; 811 } 812 } 813 814 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr) 815 { 816 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 817 uint8_t val; 818 819 if (addr + sizeof(val) > vdev->config_len) { 820 return (uint32_t)-1; 821 } 822 823 k->get_config(vdev, vdev->config); 824 825 val = ldub_p(vdev->config + addr); 826 return val; 827 } 828 829 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr) 830 { 831 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 832 uint16_t val; 833 834 if (addr + sizeof(val) > vdev->config_len) { 835 return (uint32_t)-1; 836 } 837 838 k->get_config(vdev, vdev->config); 839 840 val = lduw_p(vdev->config + addr); 841 return val; 842 } 843 844 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr) 845 { 846 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 847 uint32_t val; 848 849 if (addr + sizeof(val) > vdev->config_len) { 850 return (uint32_t)-1; 851 } 852 853 k->get_config(vdev, vdev->config); 854 855 val = ldl_p(vdev->config + addr); 856 return val; 857 } 858 859 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data) 860 { 861 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 862 uint8_t val = data; 863 864 if (addr + sizeof(val) > vdev->config_len) { 865 return; 866 } 867 868 stb_p(vdev->config + addr, val); 869 870 if (k->set_config) { 871 k->set_config(vdev, vdev->config); 872 } 873 } 874 875 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data) 876 { 877 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 878 uint16_t val = data; 879 880 if (addr + sizeof(val) > vdev->config_len) { 881 return; 882 } 883 884 stw_p(vdev->config + addr, val); 885 886 if (k->set_config) { 887 k->set_config(vdev, vdev->config); 888 } 889 } 890 891 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data) 892 { 893 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 894 uint32_t val = data; 895 896 if (addr + sizeof(val) > vdev->config_len) { 897 return; 898 } 899 900 stl_p(vdev->config + addr, val); 901 902 if (k->set_config) { 903 k->set_config(vdev, vdev->config); 904 } 905 } 906 907 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr) 908 { 909 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 910 uint8_t val; 911 912 if (addr + sizeof(val) > vdev->config_len) { 913 return (uint32_t)-1; 914 } 915 916 k->get_config(vdev, vdev->config); 917 918 val = ldub_p(vdev->config + addr); 919 return val; 920 } 921 922 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr) 923 { 924 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 925 uint16_t val; 926 927 if (addr + sizeof(val) > vdev->config_len) { 928 return (uint32_t)-1; 929 } 930 931 k->get_config(vdev, vdev->config); 932 933 val = lduw_le_p(vdev->config + addr); 934 return val; 935 } 936 937 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr) 938 { 939 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 940 uint32_t val; 941 942 if (addr + sizeof(val) > vdev->config_len) { 943 return (uint32_t)-1; 944 } 945 946 k->get_config(vdev, vdev->config); 947 948 val = ldl_le_p(vdev->config + addr); 949 return val; 950 } 951 952 void virtio_config_modern_writeb(VirtIODevice *vdev, 953 uint32_t addr, uint32_t data) 954 { 955 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 956 uint8_t val = data; 957 958 if (addr + sizeof(val) > vdev->config_len) { 959 return; 960 } 961 962 stb_p(vdev->config + addr, val); 963 964 if (k->set_config) { 965 k->set_config(vdev, vdev->config); 966 } 967 } 968 969 void virtio_config_modern_writew(VirtIODevice *vdev, 970 uint32_t addr, uint32_t data) 971 { 972 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 973 uint16_t val = data; 974 975 if (addr + sizeof(val) > vdev->config_len) { 976 return; 977 } 978 979 stw_le_p(vdev->config + addr, val); 980 981 if (k->set_config) { 982 k->set_config(vdev, vdev->config); 983 } 984 } 985 986 void virtio_config_modern_writel(VirtIODevice *vdev, 987 uint32_t addr, uint32_t data) 988 { 989 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 990 uint32_t val = data; 991 992 if (addr + sizeof(val) > vdev->config_len) { 993 return; 994 } 995 996 stl_le_p(vdev->config + addr, val); 997 998 if (k->set_config) { 999 k->set_config(vdev, vdev->config); 1000 } 1001 } 1002 1003 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr) 1004 { 1005 vdev->vq[n].vring.desc = addr; 1006 virtio_queue_update_rings(vdev, n); 1007 } 1008 1009 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n) 1010 { 1011 return vdev->vq[n].vring.desc; 1012 } 1013 1014 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc, 1015 hwaddr avail, hwaddr used) 1016 { 1017 vdev->vq[n].vring.desc = desc; 1018 vdev->vq[n].vring.avail = avail; 1019 vdev->vq[n].vring.used = used; 1020 } 1021 1022 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) 1023 { 1024 /* Don't allow guest to flip queue between existent and 1025 * nonexistent states, or to set it to an invalid size. 1026 */ 1027 if (!!num != !!vdev->vq[n].vring.num || 1028 num > VIRTQUEUE_MAX_SIZE || 1029 num < 0) { 1030 return; 1031 } 1032 vdev->vq[n].vring.num = num; 1033 } 1034 1035 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector) 1036 { 1037 return QLIST_FIRST(&vdev->vector_queues[vector]); 1038 } 1039 1040 VirtQueue *virtio_vector_next_queue(VirtQueue *vq) 1041 { 1042 return QLIST_NEXT(vq, node); 1043 } 1044 1045 int virtio_queue_get_num(VirtIODevice *vdev, int n) 1046 { 1047 return vdev->vq[n].vring.num; 1048 } 1049 1050 int virtio_get_num_queues(VirtIODevice *vdev) 1051 { 1052 int i; 1053 1054 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1055 if (!virtio_queue_get_num(vdev, i)) { 1056 break; 1057 } 1058 } 1059 1060 return i; 1061 } 1062 1063 int virtio_queue_get_id(VirtQueue *vq) 1064 { 1065 VirtIODevice *vdev = vq->vdev; 1066 assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_QUEUE_MAX]); 1067 return vq - &vdev->vq[0]; 1068 } 1069 1070 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) 1071 { 1072 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1073 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1074 1075 /* virtio-1 compliant devices cannot change the alignment */ 1076 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 1077 error_report("tried to modify queue alignment for virtio-1 device"); 1078 return; 1079 } 1080 /* Check that the transport told us it was going to do this 1081 * (so a buggy transport will immediately assert rather than 1082 * silently failing to migrate this state) 1083 */ 1084 assert(k->has_variable_vring_alignment); 1085 1086 vdev->vq[n].vring.align = align; 1087 virtio_queue_update_rings(vdev, n); 1088 } 1089 1090 void virtio_queue_notify_vq(VirtQueue *vq) 1091 { 1092 if (vq->vring.desc && vq->handle_output) { 1093 VirtIODevice *vdev = vq->vdev; 1094 1095 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); 1096 vq->handle_output(vdev, vq); 1097 } 1098 } 1099 1100 void virtio_queue_notify(VirtIODevice *vdev, int n) 1101 { 1102 virtio_queue_notify_vq(&vdev->vq[n]); 1103 } 1104 1105 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) 1106 { 1107 return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector : 1108 VIRTIO_NO_VECTOR; 1109 } 1110 1111 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector) 1112 { 1113 VirtQueue *vq = &vdev->vq[n]; 1114 1115 if (n < VIRTIO_QUEUE_MAX) { 1116 if (vdev->vector_queues && 1117 vdev->vq[n].vector != VIRTIO_NO_VECTOR) { 1118 QLIST_REMOVE(vq, node); 1119 } 1120 vdev->vq[n].vector = vector; 1121 if (vdev->vector_queues && 1122 vector != VIRTIO_NO_VECTOR) { 1123 QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node); 1124 } 1125 } 1126 } 1127 1128 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, 1129 void (*handle_output)(VirtIODevice *, VirtQueue *)) 1130 { 1131 int i; 1132 1133 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1134 if (vdev->vq[i].vring.num == 0) 1135 break; 1136 } 1137 1138 if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) 1139 abort(); 1140 1141 vdev->vq[i].vring.num = queue_size; 1142 vdev->vq[i].vring.num_default = queue_size; 1143 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN; 1144 vdev->vq[i].handle_output = handle_output; 1145 1146 return &vdev->vq[i]; 1147 } 1148 1149 void virtio_del_queue(VirtIODevice *vdev, int n) 1150 { 1151 if (n < 0 || n >= VIRTIO_QUEUE_MAX) { 1152 abort(); 1153 } 1154 1155 vdev->vq[n].vring.num = 0; 1156 vdev->vq[n].vring.num_default = 0; 1157 } 1158 1159 void virtio_irq(VirtQueue *vq) 1160 { 1161 trace_virtio_irq(vq); 1162 vq->vdev->isr |= 0x01; 1163 virtio_notify_vector(vq->vdev, vq->vector); 1164 } 1165 1166 bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) 1167 { 1168 uint16_t old, new; 1169 bool v; 1170 /* We need to expose used array entries before checking used event. */ 1171 smp_mb(); 1172 /* Always notify when queue is empty (when feature acknowledge) */ 1173 if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) && 1174 !vq->inuse && virtio_queue_empty(vq)) { 1175 return true; 1176 } 1177 1178 if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 1179 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); 1180 } 1181 1182 v = vq->signalled_used_valid; 1183 vq->signalled_used_valid = true; 1184 old = vq->signalled_used; 1185 new = vq->signalled_used = vq->used_idx; 1186 return !v || vring_need_event(vring_get_used_event(vq), new, old); 1187 } 1188 1189 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) 1190 { 1191 if (!virtio_should_notify(vdev, vq)) { 1192 return; 1193 } 1194 1195 trace_virtio_notify(vdev, vq); 1196 vdev->isr |= 0x01; 1197 virtio_notify_vector(vdev, vq->vector); 1198 } 1199 1200 void virtio_notify_config(VirtIODevice *vdev) 1201 { 1202 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 1203 return; 1204 1205 vdev->isr |= 0x03; 1206 vdev->generation++; 1207 virtio_notify_vector(vdev, vdev->config_vector); 1208 } 1209 1210 static bool virtio_device_endian_needed(void *opaque) 1211 { 1212 VirtIODevice *vdev = opaque; 1213 1214 assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN); 1215 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { 1216 return vdev->device_endian != virtio_default_endian(); 1217 } 1218 /* Devices conforming to VIRTIO 1.0 or later are always LE. */ 1219 return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE; 1220 } 1221 1222 static bool virtio_64bit_features_needed(void *opaque) 1223 { 1224 VirtIODevice *vdev = opaque; 1225 1226 return (vdev->host_features >> 32) != 0; 1227 } 1228 1229 static bool virtio_virtqueue_needed(void *opaque) 1230 { 1231 VirtIODevice *vdev = opaque; 1232 1233 return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1); 1234 } 1235 1236 static bool virtio_ringsize_needed(void *opaque) 1237 { 1238 VirtIODevice *vdev = opaque; 1239 int i; 1240 1241 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1242 if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) { 1243 return true; 1244 } 1245 } 1246 return false; 1247 } 1248 1249 static bool virtio_extra_state_needed(void *opaque) 1250 { 1251 VirtIODevice *vdev = opaque; 1252 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1253 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1254 1255 return k->has_extra_state && 1256 k->has_extra_state(qbus->parent); 1257 } 1258 1259 static const VMStateDescription vmstate_virtqueue = { 1260 .name = "virtqueue_state", 1261 .version_id = 1, 1262 .minimum_version_id = 1, 1263 .fields = (VMStateField[]) { 1264 VMSTATE_UINT64(vring.avail, struct VirtQueue), 1265 VMSTATE_UINT64(vring.used, struct VirtQueue), 1266 VMSTATE_END_OF_LIST() 1267 } 1268 }; 1269 1270 static const VMStateDescription vmstate_virtio_virtqueues = { 1271 .name = "virtio/virtqueues", 1272 .version_id = 1, 1273 .minimum_version_id = 1, 1274 .needed = &virtio_virtqueue_needed, 1275 .fields = (VMStateField[]) { 1276 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, 1277 VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue), 1278 VMSTATE_END_OF_LIST() 1279 } 1280 }; 1281 1282 static const VMStateDescription vmstate_ringsize = { 1283 .name = "ringsize_state", 1284 .version_id = 1, 1285 .minimum_version_id = 1, 1286 .fields = (VMStateField[]) { 1287 VMSTATE_UINT32(vring.num_default, struct VirtQueue), 1288 VMSTATE_END_OF_LIST() 1289 } 1290 }; 1291 1292 static const VMStateDescription vmstate_virtio_ringsize = { 1293 .name = "virtio/ringsize", 1294 .version_id = 1, 1295 .minimum_version_id = 1, 1296 .needed = &virtio_ringsize_needed, 1297 .fields = (VMStateField[]) { 1298 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, 1299 VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue), 1300 VMSTATE_END_OF_LIST() 1301 } 1302 }; 1303 1304 static int get_extra_state(QEMUFile *f, void *pv, size_t size) 1305 { 1306 VirtIODevice *vdev = pv; 1307 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1308 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1309 1310 if (!k->load_extra_state) { 1311 return -1; 1312 } else { 1313 return k->load_extra_state(qbus->parent, f); 1314 } 1315 } 1316 1317 static void put_extra_state(QEMUFile *f, void *pv, size_t size) 1318 { 1319 VirtIODevice *vdev = pv; 1320 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1321 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1322 1323 k->save_extra_state(qbus->parent, f); 1324 } 1325 1326 static const VMStateInfo vmstate_info_extra_state = { 1327 .name = "virtqueue_extra_state", 1328 .get = get_extra_state, 1329 .put = put_extra_state, 1330 }; 1331 1332 static const VMStateDescription vmstate_virtio_extra_state = { 1333 .name = "virtio/extra_state", 1334 .version_id = 1, 1335 .minimum_version_id = 1, 1336 .needed = &virtio_extra_state_needed, 1337 .fields = (VMStateField[]) { 1338 { 1339 .name = "extra_state", 1340 .version_id = 0, 1341 .field_exists = NULL, 1342 .size = 0, 1343 .info = &vmstate_info_extra_state, 1344 .flags = VMS_SINGLE, 1345 .offset = 0, 1346 }, 1347 VMSTATE_END_OF_LIST() 1348 } 1349 }; 1350 1351 static const VMStateDescription vmstate_virtio_device_endian = { 1352 .name = "virtio/device_endian", 1353 .version_id = 1, 1354 .minimum_version_id = 1, 1355 .needed = &virtio_device_endian_needed, 1356 .fields = (VMStateField[]) { 1357 VMSTATE_UINT8(device_endian, VirtIODevice), 1358 VMSTATE_END_OF_LIST() 1359 } 1360 }; 1361 1362 static const VMStateDescription vmstate_virtio_64bit_features = { 1363 .name = "virtio/64bit_features", 1364 .version_id = 1, 1365 .minimum_version_id = 1, 1366 .needed = &virtio_64bit_features_needed, 1367 .fields = (VMStateField[]) { 1368 VMSTATE_UINT64(guest_features, VirtIODevice), 1369 VMSTATE_END_OF_LIST() 1370 } 1371 }; 1372 1373 static const VMStateDescription vmstate_virtio = { 1374 .name = "virtio", 1375 .version_id = 1, 1376 .minimum_version_id = 1, 1377 .minimum_version_id_old = 1, 1378 .fields = (VMStateField[]) { 1379 VMSTATE_END_OF_LIST() 1380 }, 1381 .subsections = (const VMStateDescription*[]) { 1382 &vmstate_virtio_device_endian, 1383 &vmstate_virtio_64bit_features, 1384 &vmstate_virtio_virtqueues, 1385 &vmstate_virtio_ringsize, 1386 &vmstate_virtio_extra_state, 1387 NULL 1388 } 1389 }; 1390 1391 void virtio_save(VirtIODevice *vdev, QEMUFile *f) 1392 { 1393 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1394 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1395 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 1396 uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff); 1397 int i; 1398 1399 if (k->save_config) { 1400 k->save_config(qbus->parent, f); 1401 } 1402 1403 qemu_put_8s(f, &vdev->status); 1404 qemu_put_8s(f, &vdev->isr); 1405 qemu_put_be16s(f, &vdev->queue_sel); 1406 qemu_put_be32s(f, &guest_features_lo); 1407 qemu_put_be32(f, vdev->config_len); 1408 qemu_put_buffer(f, vdev->config, vdev->config_len); 1409 1410 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1411 if (vdev->vq[i].vring.num == 0) 1412 break; 1413 } 1414 1415 qemu_put_be32(f, i); 1416 1417 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1418 if (vdev->vq[i].vring.num == 0) 1419 break; 1420 1421 qemu_put_be32(f, vdev->vq[i].vring.num); 1422 if (k->has_variable_vring_alignment) { 1423 qemu_put_be32(f, vdev->vq[i].vring.align); 1424 } 1425 /* XXX virtio-1 devices */ 1426 qemu_put_be64(f, vdev->vq[i].vring.desc); 1427 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); 1428 if (k->save_queue) { 1429 k->save_queue(qbus->parent, i, f); 1430 } 1431 } 1432 1433 if (vdc->save != NULL) { 1434 vdc->save(vdev, f); 1435 } 1436 1437 /* Subsections */ 1438 vmstate_save_state(f, &vmstate_virtio, vdev, NULL); 1439 } 1440 1441 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) 1442 { 1443 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 1444 bool bad = (val & ~(vdev->host_features)) != 0; 1445 1446 val &= vdev->host_features; 1447 if (k->set_features) { 1448 k->set_features(vdev, val); 1449 } 1450 vdev->guest_features = val; 1451 return bad ? -1 : 0; 1452 } 1453 1454 int virtio_set_features(VirtIODevice *vdev, uint64_t val) 1455 { 1456 /* 1457 * The driver must not attempt to set features after feature negotiation 1458 * has finished. 1459 */ 1460 if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) { 1461 return -EINVAL; 1462 } 1463 return virtio_set_features_nocheck(vdev, val); 1464 } 1465 1466 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) 1467 { 1468 int i, ret; 1469 int32_t config_len; 1470 uint32_t num; 1471 uint32_t features; 1472 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1473 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1474 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 1475 1476 /* 1477 * We poison the endianness to ensure it does not get used before 1478 * subsections have been loaded. 1479 */ 1480 vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN; 1481 1482 if (k->load_config) { 1483 ret = k->load_config(qbus->parent, f); 1484 if (ret) 1485 return ret; 1486 } 1487 1488 qemu_get_8s(f, &vdev->status); 1489 qemu_get_8s(f, &vdev->isr); 1490 qemu_get_be16s(f, &vdev->queue_sel); 1491 if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) { 1492 return -1; 1493 } 1494 qemu_get_be32s(f, &features); 1495 1496 config_len = qemu_get_be32(f); 1497 1498 /* 1499 * There are cases where the incoming config can be bigger or smaller 1500 * than what we have; so load what we have space for, and skip 1501 * any excess that's in the stream. 1502 */ 1503 qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len)); 1504 1505 while (config_len > vdev->config_len) { 1506 qemu_get_byte(f); 1507 config_len--; 1508 } 1509 1510 num = qemu_get_be32(f); 1511 1512 if (num > VIRTIO_QUEUE_MAX) { 1513 error_report("Invalid number of virtqueues: 0x%x", num); 1514 return -1; 1515 } 1516 1517 for (i = 0; i < num; i++) { 1518 vdev->vq[i].vring.num = qemu_get_be32(f); 1519 if (k->has_variable_vring_alignment) { 1520 vdev->vq[i].vring.align = qemu_get_be32(f); 1521 } 1522 vdev->vq[i].vring.desc = qemu_get_be64(f); 1523 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); 1524 vdev->vq[i].signalled_used_valid = false; 1525 vdev->vq[i].notification = true; 1526 1527 if (vdev->vq[i].vring.desc) { 1528 /* XXX virtio-1 devices */ 1529 virtio_queue_update_rings(vdev, i); 1530 } else if (vdev->vq[i].last_avail_idx) { 1531 error_report("VQ %d address 0x0 " 1532 "inconsistent with Host index 0x%x", 1533 i, vdev->vq[i].last_avail_idx); 1534 return -1; 1535 } 1536 if (k->load_queue) { 1537 ret = k->load_queue(qbus->parent, i, f); 1538 if (ret) 1539 return ret; 1540 } 1541 } 1542 1543 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 1544 1545 if (vdc->load != NULL) { 1546 ret = vdc->load(vdev, f, version_id); 1547 if (ret) { 1548 return ret; 1549 } 1550 } 1551 1552 /* Subsections */ 1553 ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1); 1554 if (ret) { 1555 return ret; 1556 } 1557 1558 if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) { 1559 vdev->device_endian = virtio_default_endian(); 1560 } 1561 1562 if (virtio_64bit_features_needed(vdev)) { 1563 /* 1564 * Subsection load filled vdev->guest_features. Run them 1565 * through virtio_set_features to sanity-check them against 1566 * host_features. 1567 */ 1568 uint64_t features64 = vdev->guest_features; 1569 if (virtio_set_features_nocheck(vdev, features64) < 0) { 1570 error_report("Features 0x%" PRIx64 " unsupported. " 1571 "Allowed features: 0x%" PRIx64, 1572 features64, vdev->host_features); 1573 return -1; 1574 } 1575 } else { 1576 if (virtio_set_features_nocheck(vdev, features) < 0) { 1577 error_report("Features 0x%x unsupported. " 1578 "Allowed features: 0x%" PRIx64, 1579 features, vdev->host_features); 1580 return -1; 1581 } 1582 } 1583 1584 for (i = 0; i < num; i++) { 1585 if (vdev->vq[i].vring.desc) { 1586 uint16_t nheads; 1587 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx; 1588 /* Check it isn't doing strange things with descriptor numbers. */ 1589 if (nheads > vdev->vq[i].vring.num) { 1590 error_report("VQ %d size 0x%x Guest index 0x%x " 1591 "inconsistent with Host index 0x%x: delta 0x%x", 1592 i, vdev->vq[i].vring.num, 1593 vring_avail_idx(&vdev->vq[i]), 1594 vdev->vq[i].last_avail_idx, nheads); 1595 return -1; 1596 } 1597 vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]); 1598 vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]); 1599 } 1600 } 1601 1602 return 0; 1603 } 1604 1605 void virtio_cleanup(VirtIODevice *vdev) 1606 { 1607 qemu_del_vm_change_state_handler(vdev->vmstate); 1608 g_free(vdev->config); 1609 g_free(vdev->vq); 1610 g_free(vdev->vector_queues); 1611 } 1612 1613 static void virtio_vmstate_change(void *opaque, int running, RunState state) 1614 { 1615 VirtIODevice *vdev = opaque; 1616 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1617 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1618 bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); 1619 vdev->vm_running = running; 1620 1621 if (backend_run) { 1622 virtio_set_status(vdev, vdev->status); 1623 } 1624 1625 if (k->vmstate_change) { 1626 k->vmstate_change(qbus->parent, backend_run); 1627 } 1628 1629 if (!backend_run) { 1630 virtio_set_status(vdev, vdev->status); 1631 } 1632 } 1633 1634 void virtio_instance_init_common(Object *proxy_obj, void *data, 1635 size_t vdev_size, const char *vdev_name) 1636 { 1637 DeviceState *vdev = data; 1638 1639 object_initialize(vdev, vdev_size, vdev_name); 1640 object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL); 1641 object_unref(OBJECT(vdev)); 1642 qdev_alias_all_properties(vdev, proxy_obj); 1643 } 1644 1645 void virtio_init(VirtIODevice *vdev, const char *name, 1646 uint16_t device_id, size_t config_size) 1647 { 1648 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1649 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1650 int i; 1651 int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0; 1652 1653 if (nvectors) { 1654 vdev->vector_queues = 1655 g_malloc0(sizeof(*vdev->vector_queues) * nvectors); 1656 } 1657 1658 vdev->device_id = device_id; 1659 vdev->status = 0; 1660 vdev->isr = 0; 1661 vdev->queue_sel = 0; 1662 vdev->config_vector = VIRTIO_NO_VECTOR; 1663 vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX); 1664 vdev->vm_running = runstate_is_running(); 1665 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1666 vdev->vq[i].vector = VIRTIO_NO_VECTOR; 1667 vdev->vq[i].vdev = vdev; 1668 vdev->vq[i].queue_index = i; 1669 } 1670 1671 vdev->name = name; 1672 vdev->config_len = config_size; 1673 if (vdev->config_len) { 1674 vdev->config = g_malloc0(config_size); 1675 } else { 1676 vdev->config = NULL; 1677 } 1678 vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, 1679 vdev); 1680 vdev->device_endian = virtio_default_endian(); 1681 vdev->use_guest_notifier_mask = true; 1682 } 1683 1684 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) 1685 { 1686 return vdev->vq[n].vring.desc; 1687 } 1688 1689 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n) 1690 { 1691 return vdev->vq[n].vring.avail; 1692 } 1693 1694 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n) 1695 { 1696 return vdev->vq[n].vring.used; 1697 } 1698 1699 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n) 1700 { 1701 return vdev->vq[n].vring.desc; 1702 } 1703 1704 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) 1705 { 1706 return sizeof(VRingDesc) * vdev->vq[n].vring.num; 1707 } 1708 1709 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) 1710 { 1711 return offsetof(VRingAvail, ring) + 1712 sizeof(uint16_t) * vdev->vq[n].vring.num; 1713 } 1714 1715 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) 1716 { 1717 return offsetof(VRingUsed, ring) + 1718 sizeof(VRingUsedElem) * vdev->vq[n].vring.num; 1719 } 1720 1721 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n) 1722 { 1723 return vdev->vq[n].vring.used - vdev->vq[n].vring.desc + 1724 virtio_queue_get_used_size(vdev, n); 1725 } 1726 1727 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) 1728 { 1729 return vdev->vq[n].last_avail_idx; 1730 } 1731 1732 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) 1733 { 1734 vdev->vq[n].last_avail_idx = idx; 1735 vdev->vq[n].shadow_avail_idx = idx; 1736 } 1737 1738 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n) 1739 { 1740 vdev->vq[n].signalled_used_valid = false; 1741 } 1742 1743 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n) 1744 { 1745 return vdev->vq + n; 1746 } 1747 1748 uint16_t virtio_get_queue_index(VirtQueue *vq) 1749 { 1750 return vq->queue_index; 1751 } 1752 1753 static void virtio_queue_guest_notifier_read(EventNotifier *n) 1754 { 1755 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier); 1756 if (event_notifier_test_and_clear(n)) { 1757 virtio_irq(vq); 1758 } 1759 } 1760 1761 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, 1762 bool with_irqfd) 1763 { 1764 if (assign && !with_irqfd) { 1765 event_notifier_set_handler(&vq->guest_notifier, 1766 virtio_queue_guest_notifier_read); 1767 } else { 1768 event_notifier_set_handler(&vq->guest_notifier, NULL); 1769 } 1770 if (!assign) { 1771 /* Test and clear notifier before closing it, 1772 * in case poll callback didn't have time to run. */ 1773 virtio_queue_guest_notifier_read(&vq->guest_notifier); 1774 } 1775 } 1776 1777 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) 1778 { 1779 return &vq->guest_notifier; 1780 } 1781 1782 static void virtio_queue_host_notifier_read(EventNotifier *n) 1783 { 1784 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 1785 if (event_notifier_test_and_clear(n)) { 1786 virtio_queue_notify_vq(vq); 1787 } 1788 } 1789 1790 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx, 1791 bool assign, bool set_handler) 1792 { 1793 if (assign && set_handler) { 1794 aio_set_event_notifier(ctx, &vq->host_notifier, true, 1795 virtio_queue_host_notifier_read); 1796 } else { 1797 aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL); 1798 } 1799 if (!assign) { 1800 /* Test and clear notifier before after disabling event, 1801 * in case poll callback didn't have time to run. */ 1802 virtio_queue_host_notifier_read(&vq->host_notifier); 1803 } 1804 } 1805 1806 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, 1807 bool set_handler) 1808 { 1809 if (assign && set_handler) { 1810 event_notifier_set_handler(&vq->host_notifier, 1811 virtio_queue_host_notifier_read); 1812 } else { 1813 event_notifier_set_handler(&vq->host_notifier, NULL); 1814 } 1815 if (!assign) { 1816 /* Test and clear notifier before after disabling event, 1817 * in case poll callback didn't have time to run. */ 1818 virtio_queue_host_notifier_read(&vq->host_notifier); 1819 } 1820 } 1821 1822 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) 1823 { 1824 return &vq->host_notifier; 1825 } 1826 1827 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) 1828 { 1829 g_free(vdev->bus_name); 1830 vdev->bus_name = g_strdup(bus_name); 1831 } 1832 1833 static void virtio_device_realize(DeviceState *dev, Error **errp) 1834 { 1835 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1836 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 1837 Error *err = NULL; 1838 1839 if (vdc->realize != NULL) { 1840 vdc->realize(dev, &err); 1841 if (err != NULL) { 1842 error_propagate(errp, err); 1843 return; 1844 } 1845 } 1846 1847 virtio_bus_device_plugged(vdev, &err); 1848 if (err != NULL) { 1849 error_propagate(errp, err); 1850 return; 1851 } 1852 } 1853 1854 static void virtio_device_unrealize(DeviceState *dev, Error **errp) 1855 { 1856 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1857 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 1858 Error *err = NULL; 1859 1860 virtio_bus_device_unplugged(vdev); 1861 1862 if (vdc->unrealize != NULL) { 1863 vdc->unrealize(dev, &err); 1864 if (err != NULL) { 1865 error_propagate(errp, err); 1866 return; 1867 } 1868 } 1869 1870 g_free(vdev->bus_name); 1871 vdev->bus_name = NULL; 1872 } 1873 1874 static Property virtio_properties[] = { 1875 DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features), 1876 DEFINE_PROP_END_OF_LIST(), 1877 }; 1878 1879 static void virtio_device_class_init(ObjectClass *klass, void *data) 1880 { 1881 /* Set the default value here. */ 1882 DeviceClass *dc = DEVICE_CLASS(klass); 1883 1884 dc->realize = virtio_device_realize; 1885 dc->unrealize = virtio_device_unrealize; 1886 dc->bus_type = TYPE_VIRTIO_BUS; 1887 dc->props = virtio_properties; 1888 } 1889 1890 static const TypeInfo virtio_device_info = { 1891 .name = TYPE_VIRTIO_DEVICE, 1892 .parent = TYPE_DEVICE, 1893 .instance_size = sizeof(VirtIODevice), 1894 .class_init = virtio_device_class_init, 1895 .abstract = true, 1896 .class_size = sizeof(VirtioDeviceClass), 1897 }; 1898 1899 static void virtio_register_types(void) 1900 { 1901 type_register_static(&virtio_device_info); 1902 } 1903 1904 type_init(virtio_register_types) 1905