1 /* 2 * Virtio Support 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include <inttypes.h> 15 16 #include "trace.h" 17 #include "exec/address-spaces.h" 18 #include "qemu/error-report.h" 19 #include "hw/virtio/virtio.h" 20 #include "qemu/atomic.h" 21 #include "hw/virtio/virtio-bus.h" 22 #include "migration/migration.h" 23 #include "hw/virtio/virtio-access.h" 24 25 /* 26 * The alignment to use between consumer and producer parts of vring. 27 * x86 pagesize again. This is the default, used by transports like PCI 28 * which don't provide a means for the guest to tell the host the alignment. 29 */ 30 #define VIRTIO_PCI_VRING_ALIGN 4096 31 32 typedef struct VRingDesc 33 { 34 uint64_t addr; 35 uint32_t len; 36 uint16_t flags; 37 uint16_t next; 38 } VRingDesc; 39 40 typedef struct VRingAvail 41 { 42 uint16_t flags; 43 uint16_t idx; 44 uint16_t ring[0]; 45 } VRingAvail; 46 47 typedef struct VRingUsedElem 48 { 49 uint32_t id; 50 uint32_t len; 51 } VRingUsedElem; 52 53 typedef struct VRingUsed 54 { 55 uint16_t flags; 56 uint16_t idx; 57 VRingUsedElem ring[0]; 58 } VRingUsed; 59 60 typedef struct VRing 61 { 62 unsigned int num; 63 unsigned int align; 64 hwaddr desc; 65 hwaddr avail; 66 hwaddr used; 67 } VRing; 68 69 struct VirtQueue 70 { 71 VRing vring; 72 hwaddr pa; 73 uint16_t last_avail_idx; 74 /* Last used index value we have signalled on */ 75 uint16_t signalled_used; 76 77 /* Last used index value we have signalled on */ 78 bool signalled_used_valid; 79 80 /* Notification enabled? */ 81 bool notification; 82 83 uint16_t queue_index; 84 85 int inuse; 86 87 uint16_t vector; 88 void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); 89 VirtIODevice *vdev; 90 EventNotifier guest_notifier; 91 EventNotifier host_notifier; 92 QLIST_ENTRY(VirtQueue) node; 93 }; 94 95 /* virt queue functions */ 96 static void virtqueue_init(VirtQueue *vq) 97 { 98 hwaddr pa = vq->pa; 99 100 vq->vring.desc = pa; 101 vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc); 102 vq->vring.used = vring_align(vq->vring.avail + 103 offsetof(VRingAvail, ring[vq->vring.num]), 104 vq->vring.align); 105 } 106 107 static inline uint64_t vring_desc_addr(VirtIODevice *vdev, hwaddr desc_pa, 108 int i) 109 { 110 hwaddr pa; 111 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr); 112 return virtio_ldq_phys(vdev, pa); 113 } 114 115 static inline uint32_t vring_desc_len(VirtIODevice *vdev, hwaddr desc_pa, int i) 116 { 117 hwaddr pa; 118 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len); 119 return virtio_ldl_phys(vdev, pa); 120 } 121 122 static inline uint16_t vring_desc_flags(VirtIODevice *vdev, hwaddr desc_pa, 123 int i) 124 { 125 hwaddr pa; 126 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags); 127 return virtio_lduw_phys(vdev, pa); 128 } 129 130 static inline uint16_t vring_desc_next(VirtIODevice *vdev, hwaddr desc_pa, 131 int i) 132 { 133 hwaddr pa; 134 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next); 135 return virtio_lduw_phys(vdev, pa); 136 } 137 138 static inline uint16_t vring_avail_flags(VirtQueue *vq) 139 { 140 hwaddr pa; 141 pa = vq->vring.avail + offsetof(VRingAvail, flags); 142 return virtio_lduw_phys(vq->vdev, pa); 143 } 144 145 static inline uint16_t vring_avail_idx(VirtQueue *vq) 146 { 147 hwaddr pa; 148 pa = vq->vring.avail + offsetof(VRingAvail, idx); 149 return virtio_lduw_phys(vq->vdev, pa); 150 } 151 152 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) 153 { 154 hwaddr pa; 155 pa = vq->vring.avail + offsetof(VRingAvail, ring[i]); 156 return virtio_lduw_phys(vq->vdev, pa); 157 } 158 159 static inline uint16_t vring_get_used_event(VirtQueue *vq) 160 { 161 return vring_avail_ring(vq, vq->vring.num); 162 } 163 164 static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val) 165 { 166 hwaddr pa; 167 pa = vq->vring.used + offsetof(VRingUsed, ring[i].id); 168 virtio_stl_phys(vq->vdev, pa, val); 169 } 170 171 static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val) 172 { 173 hwaddr pa; 174 pa = vq->vring.used + offsetof(VRingUsed, ring[i].len); 175 virtio_stl_phys(vq->vdev, pa, val); 176 } 177 178 static uint16_t vring_used_idx(VirtQueue *vq) 179 { 180 hwaddr pa; 181 pa = vq->vring.used + offsetof(VRingUsed, idx); 182 return virtio_lduw_phys(vq->vdev, pa); 183 } 184 185 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) 186 { 187 hwaddr pa; 188 pa = vq->vring.used + offsetof(VRingUsed, idx); 189 virtio_stw_phys(vq->vdev, pa, val); 190 } 191 192 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) 193 { 194 VirtIODevice *vdev = vq->vdev; 195 hwaddr pa; 196 pa = vq->vring.used + offsetof(VRingUsed, flags); 197 virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask); 198 } 199 200 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) 201 { 202 VirtIODevice *vdev = vq->vdev; 203 hwaddr pa; 204 pa = vq->vring.used + offsetof(VRingUsed, flags); 205 virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask); 206 } 207 208 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) 209 { 210 hwaddr pa; 211 if (!vq->notification) { 212 return; 213 } 214 pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]); 215 virtio_stw_phys(vq->vdev, pa, val); 216 } 217 218 void virtio_queue_set_notification(VirtQueue *vq, int enable) 219 { 220 vq->notification = enable; 221 if (virtio_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { 222 vring_set_avail_event(vq, vring_avail_idx(vq)); 223 } else if (enable) { 224 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); 225 } else { 226 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); 227 } 228 if (enable) { 229 /* Expose avail event/used flags before caller checks the avail idx. */ 230 smp_mb(); 231 } 232 } 233 234 int virtio_queue_ready(VirtQueue *vq) 235 { 236 return vq->vring.avail != 0; 237 } 238 239 int virtio_queue_empty(VirtQueue *vq) 240 { 241 return vring_avail_idx(vq) == vq->last_avail_idx; 242 } 243 244 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, 245 unsigned int len, unsigned int idx) 246 { 247 unsigned int offset; 248 int i; 249 250 trace_virtqueue_fill(vq, elem, len, idx); 251 252 offset = 0; 253 for (i = 0; i < elem->in_num; i++) { 254 size_t size = MIN(len - offset, elem->in_sg[i].iov_len); 255 256 cpu_physical_memory_unmap(elem->in_sg[i].iov_base, 257 elem->in_sg[i].iov_len, 258 1, size); 259 260 offset += size; 261 } 262 263 for (i = 0; i < elem->out_num; i++) 264 cpu_physical_memory_unmap(elem->out_sg[i].iov_base, 265 elem->out_sg[i].iov_len, 266 0, elem->out_sg[i].iov_len); 267 268 idx = (idx + vring_used_idx(vq)) % vq->vring.num; 269 270 /* Get a pointer to the next entry in the used ring. */ 271 vring_used_ring_id(vq, idx, elem->index); 272 vring_used_ring_len(vq, idx, len); 273 } 274 275 void virtqueue_flush(VirtQueue *vq, unsigned int count) 276 { 277 uint16_t old, new; 278 /* Make sure buffer is written before we update index. */ 279 smp_wmb(); 280 trace_virtqueue_flush(vq, count); 281 old = vring_used_idx(vq); 282 new = old + count; 283 vring_used_idx_set(vq, new); 284 vq->inuse -= count; 285 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) 286 vq->signalled_used_valid = false; 287 } 288 289 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, 290 unsigned int len) 291 { 292 virtqueue_fill(vq, elem, len, 0); 293 virtqueue_flush(vq, 1); 294 } 295 296 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) 297 { 298 uint16_t num_heads = vring_avail_idx(vq) - idx; 299 300 /* Check it isn't doing very strange things with descriptor numbers. */ 301 if (num_heads > vq->vring.num) { 302 error_report("Guest moved used index from %u to %u", 303 idx, vring_avail_idx(vq)); 304 exit(1); 305 } 306 /* On success, callers read a descriptor at vq->last_avail_idx. 307 * Make sure descriptor read does not bypass avail index read. */ 308 if (num_heads) { 309 smp_rmb(); 310 } 311 312 return num_heads; 313 } 314 315 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) 316 { 317 unsigned int head; 318 319 /* Grab the next descriptor number they're advertising, and increment 320 * the index we've seen. */ 321 head = vring_avail_ring(vq, idx % vq->vring.num); 322 323 /* If their number is silly, that's a fatal mistake. */ 324 if (head >= vq->vring.num) { 325 error_report("Guest says index %u is available", head); 326 exit(1); 327 } 328 329 return head; 330 } 331 332 static unsigned virtqueue_next_desc(VirtIODevice *vdev, hwaddr desc_pa, 333 unsigned int i, unsigned int max) 334 { 335 unsigned int next; 336 337 /* If this descriptor says it doesn't chain, we're done. */ 338 if (!(vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_NEXT)) { 339 return max; 340 } 341 342 /* Check they're not leading us off end of descriptors. */ 343 next = vring_desc_next(vdev, desc_pa, i); 344 /* Make sure compiler knows to grab that: we don't want it changing! */ 345 smp_wmb(); 346 347 if (next >= max) { 348 error_report("Desc next is %u", next); 349 exit(1); 350 } 351 352 return next; 353 } 354 355 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, 356 unsigned int *out_bytes, 357 unsigned max_in_bytes, unsigned max_out_bytes) 358 { 359 unsigned int idx; 360 unsigned int total_bufs, in_total, out_total; 361 362 idx = vq->last_avail_idx; 363 364 total_bufs = in_total = out_total = 0; 365 while (virtqueue_num_heads(vq, idx)) { 366 VirtIODevice *vdev = vq->vdev; 367 unsigned int max, num_bufs, indirect = 0; 368 hwaddr desc_pa; 369 int i; 370 371 max = vq->vring.num; 372 num_bufs = total_bufs; 373 i = virtqueue_get_head(vq, idx++); 374 desc_pa = vq->vring.desc; 375 376 if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_INDIRECT) { 377 if (vring_desc_len(vdev, desc_pa, i) % sizeof(VRingDesc)) { 378 error_report("Invalid size for indirect buffer table"); 379 exit(1); 380 } 381 382 /* If we've got too many, that implies a descriptor loop. */ 383 if (num_bufs >= max) { 384 error_report("Looped descriptor"); 385 exit(1); 386 } 387 388 /* loop over the indirect descriptor table */ 389 indirect = 1; 390 max = vring_desc_len(vdev, desc_pa, i) / sizeof(VRingDesc); 391 desc_pa = vring_desc_addr(vdev, desc_pa, i); 392 num_bufs = i = 0; 393 } 394 395 do { 396 /* If we've got too many, that implies a descriptor loop. */ 397 if (++num_bufs > max) { 398 error_report("Looped descriptor"); 399 exit(1); 400 } 401 402 if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_WRITE) { 403 in_total += vring_desc_len(vdev, desc_pa, i); 404 } else { 405 out_total += vring_desc_len(vdev, desc_pa, i); 406 } 407 if (in_total >= max_in_bytes && out_total >= max_out_bytes) { 408 goto done; 409 } 410 } while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max); 411 412 if (!indirect) 413 total_bufs = num_bufs; 414 else 415 total_bufs++; 416 } 417 done: 418 if (in_bytes) { 419 *in_bytes = in_total; 420 } 421 if (out_bytes) { 422 *out_bytes = out_total; 423 } 424 } 425 426 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, 427 unsigned int out_bytes) 428 { 429 unsigned int in_total, out_total; 430 431 virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes); 432 return in_bytes <= in_total && out_bytes <= out_total; 433 } 434 435 void virtqueue_map_sg(struct iovec *sg, hwaddr *addr, 436 size_t num_sg, int is_write) 437 { 438 unsigned int i; 439 hwaddr len; 440 441 if (num_sg > VIRTQUEUE_MAX_SIZE) { 442 error_report("virtio: map attempt out of bounds: %zd > %d", 443 num_sg, VIRTQUEUE_MAX_SIZE); 444 exit(1); 445 } 446 447 for (i = 0; i < num_sg; i++) { 448 len = sg[i].iov_len; 449 sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write); 450 if (sg[i].iov_base == NULL || len != sg[i].iov_len) { 451 error_report("virtio: error trying to map MMIO memory"); 452 exit(1); 453 } 454 } 455 } 456 457 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) 458 { 459 unsigned int i, head, max; 460 hwaddr desc_pa = vq->vring.desc; 461 VirtIODevice *vdev = vq->vdev; 462 463 if (!virtqueue_num_heads(vq, vq->last_avail_idx)) 464 return 0; 465 466 /* When we start there are none of either input nor output. */ 467 elem->out_num = elem->in_num = 0; 468 469 max = vq->vring.num; 470 471 i = head = virtqueue_get_head(vq, vq->last_avail_idx++); 472 if (virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 473 vring_set_avail_event(vq, vq->last_avail_idx); 474 } 475 476 if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_INDIRECT) { 477 if (vring_desc_len(vdev, desc_pa, i) % sizeof(VRingDesc)) { 478 error_report("Invalid size for indirect buffer table"); 479 exit(1); 480 } 481 482 /* loop over the indirect descriptor table */ 483 max = vring_desc_len(vdev, desc_pa, i) / sizeof(VRingDesc); 484 desc_pa = vring_desc_addr(vdev, desc_pa, i); 485 i = 0; 486 } 487 488 /* Collect all the descriptors */ 489 do { 490 struct iovec *sg; 491 492 if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_WRITE) { 493 if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) { 494 error_report("Too many write descriptors in indirect table"); 495 exit(1); 496 } 497 elem->in_addr[elem->in_num] = vring_desc_addr(vdev, desc_pa, i); 498 sg = &elem->in_sg[elem->in_num++]; 499 } else { 500 if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) { 501 error_report("Too many read descriptors in indirect table"); 502 exit(1); 503 } 504 elem->out_addr[elem->out_num] = vring_desc_addr(vdev, desc_pa, i); 505 sg = &elem->out_sg[elem->out_num++]; 506 } 507 508 sg->iov_len = vring_desc_len(vdev, desc_pa, i); 509 510 /* If we've got too many, that implies a descriptor loop. */ 511 if ((elem->in_num + elem->out_num) > max) { 512 error_report("Looped descriptor"); 513 exit(1); 514 } 515 } while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max); 516 517 /* Now map what we have collected */ 518 virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1); 519 virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0); 520 521 elem->index = head; 522 523 vq->inuse++; 524 525 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); 526 return elem->in_num + elem->out_num; 527 } 528 529 /* virtio device */ 530 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector) 531 { 532 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 533 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 534 535 if (k->notify) { 536 k->notify(qbus->parent, vector); 537 } 538 } 539 540 void virtio_update_irq(VirtIODevice *vdev) 541 { 542 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 543 } 544 545 void virtio_set_status(VirtIODevice *vdev, uint8_t val) 546 { 547 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 548 trace_virtio_set_status(vdev, val); 549 550 if (k->set_status) { 551 k->set_status(vdev, val); 552 } 553 vdev->status = val; 554 } 555 556 bool target_words_bigendian(void); 557 static enum virtio_device_endian virtio_default_endian(void) 558 { 559 if (target_words_bigendian()) { 560 return VIRTIO_DEVICE_ENDIAN_BIG; 561 } else { 562 return VIRTIO_DEVICE_ENDIAN_LITTLE; 563 } 564 } 565 566 static enum virtio_device_endian virtio_current_cpu_endian(void) 567 { 568 CPUClass *cc = CPU_GET_CLASS(current_cpu); 569 570 if (cc->virtio_is_big_endian(current_cpu)) { 571 return VIRTIO_DEVICE_ENDIAN_BIG; 572 } else { 573 return VIRTIO_DEVICE_ENDIAN_LITTLE; 574 } 575 } 576 577 void virtio_reset(void *opaque) 578 { 579 VirtIODevice *vdev = opaque; 580 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 581 int i; 582 583 virtio_set_status(vdev, 0); 584 if (current_cpu) { 585 /* Guest initiated reset */ 586 vdev->device_endian = virtio_current_cpu_endian(); 587 } else { 588 /* System reset */ 589 vdev->device_endian = virtio_default_endian(); 590 } 591 592 if (k->reset) { 593 k->reset(vdev); 594 } 595 596 vdev->guest_features = 0; 597 vdev->queue_sel = 0; 598 vdev->status = 0; 599 vdev->isr = 0; 600 vdev->config_vector = VIRTIO_NO_VECTOR; 601 virtio_notify_vector(vdev, vdev->config_vector); 602 603 for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { 604 vdev->vq[i].vring.desc = 0; 605 vdev->vq[i].vring.avail = 0; 606 vdev->vq[i].vring.used = 0; 607 vdev->vq[i].last_avail_idx = 0; 608 vdev->vq[i].pa = 0; 609 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); 610 vdev->vq[i].signalled_used = 0; 611 vdev->vq[i].signalled_used_valid = false; 612 vdev->vq[i].notification = true; 613 } 614 } 615 616 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr) 617 { 618 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 619 uint8_t val; 620 621 if (addr + sizeof(val) > vdev->config_len) { 622 return (uint32_t)-1; 623 } 624 625 k->get_config(vdev, vdev->config); 626 627 val = ldub_p(vdev->config + addr); 628 return val; 629 } 630 631 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr) 632 { 633 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 634 uint16_t val; 635 636 if (addr + sizeof(val) > vdev->config_len) { 637 return (uint32_t)-1; 638 } 639 640 k->get_config(vdev, vdev->config); 641 642 val = lduw_p(vdev->config + addr); 643 return val; 644 } 645 646 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr) 647 { 648 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 649 uint32_t val; 650 651 if (addr + sizeof(val) > vdev->config_len) { 652 return (uint32_t)-1; 653 } 654 655 k->get_config(vdev, vdev->config); 656 657 val = ldl_p(vdev->config + addr); 658 return val; 659 } 660 661 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data) 662 { 663 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 664 uint8_t val = data; 665 666 if (addr + sizeof(val) > vdev->config_len) { 667 return; 668 } 669 670 stb_p(vdev->config + addr, val); 671 672 if (k->set_config) { 673 k->set_config(vdev, vdev->config); 674 } 675 } 676 677 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data) 678 { 679 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 680 uint16_t val = data; 681 682 if (addr + sizeof(val) > vdev->config_len) { 683 return; 684 } 685 686 stw_p(vdev->config + addr, val); 687 688 if (k->set_config) { 689 k->set_config(vdev, vdev->config); 690 } 691 } 692 693 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data) 694 { 695 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 696 uint32_t val = data; 697 698 if (addr + sizeof(val) > vdev->config_len) { 699 return; 700 } 701 702 stl_p(vdev->config + addr, val); 703 704 if (k->set_config) { 705 k->set_config(vdev, vdev->config); 706 } 707 } 708 709 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr) 710 { 711 vdev->vq[n].pa = addr; 712 virtqueue_init(&vdev->vq[n]); 713 } 714 715 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n) 716 { 717 return vdev->vq[n].pa; 718 } 719 720 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) 721 { 722 /* Don't allow guest to flip queue between existent and 723 * nonexistent states, or to set it to an invalid size. 724 */ 725 if (!!num != !!vdev->vq[n].vring.num || 726 num > VIRTQUEUE_MAX_SIZE || 727 num < 0) { 728 return; 729 } 730 vdev->vq[n].vring.num = num; 731 virtqueue_init(&vdev->vq[n]); 732 } 733 734 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector) 735 { 736 return QLIST_FIRST(&vdev->vector_queues[vector]); 737 } 738 739 VirtQueue *virtio_vector_next_queue(VirtQueue *vq) 740 { 741 return QLIST_NEXT(vq, node); 742 } 743 744 int virtio_queue_get_num(VirtIODevice *vdev, int n) 745 { 746 return vdev->vq[n].vring.num; 747 } 748 749 int virtio_queue_get_id(VirtQueue *vq) 750 { 751 VirtIODevice *vdev = vq->vdev; 752 assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]); 753 return vq - &vdev->vq[0]; 754 } 755 756 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) 757 { 758 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 759 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 760 761 /* Check that the transport told us it was going to do this 762 * (so a buggy transport will immediately assert rather than 763 * silently failing to migrate this state) 764 */ 765 assert(k->has_variable_vring_alignment); 766 767 vdev->vq[n].vring.align = align; 768 virtqueue_init(&vdev->vq[n]); 769 } 770 771 void virtio_queue_notify_vq(VirtQueue *vq) 772 { 773 if (vq->vring.desc && vq->handle_output) { 774 VirtIODevice *vdev = vq->vdev; 775 776 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); 777 vq->handle_output(vdev, vq); 778 } 779 } 780 781 void virtio_queue_notify(VirtIODevice *vdev, int n) 782 { 783 virtio_queue_notify_vq(&vdev->vq[n]); 784 } 785 786 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) 787 { 788 return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector : 789 VIRTIO_NO_VECTOR; 790 } 791 792 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector) 793 { 794 VirtQueue *vq = &vdev->vq[n]; 795 796 if (n < VIRTIO_PCI_QUEUE_MAX) { 797 if (vdev->vector_queues && 798 vdev->vq[n].vector != VIRTIO_NO_VECTOR) { 799 QLIST_REMOVE(vq, node); 800 } 801 vdev->vq[n].vector = vector; 802 if (vdev->vector_queues && 803 vector != VIRTIO_NO_VECTOR) { 804 QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node); 805 } 806 } 807 } 808 809 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, 810 void (*handle_output)(VirtIODevice *, VirtQueue *)) 811 { 812 int i; 813 814 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { 815 if (vdev->vq[i].vring.num == 0) 816 break; 817 } 818 819 if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) 820 abort(); 821 822 vdev->vq[i].vring.num = queue_size; 823 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN; 824 vdev->vq[i].handle_output = handle_output; 825 826 return &vdev->vq[i]; 827 } 828 829 void virtio_del_queue(VirtIODevice *vdev, int n) 830 { 831 if (n < 0 || n >= VIRTIO_PCI_QUEUE_MAX) { 832 abort(); 833 } 834 835 vdev->vq[n].vring.num = 0; 836 } 837 838 void virtio_irq(VirtQueue *vq) 839 { 840 trace_virtio_irq(vq); 841 vq->vdev->isr |= 0x01; 842 virtio_notify_vector(vq->vdev, vq->vector); 843 } 844 845 static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq) 846 { 847 uint16_t old, new; 848 bool v; 849 /* We need to expose used array entries before checking used event. */ 850 smp_mb(); 851 /* Always notify when queue is empty (when feature acknowledge) */ 852 if (virtio_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) && 853 !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx) { 854 return true; 855 } 856 857 if (!virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { 858 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); 859 } 860 861 v = vq->signalled_used_valid; 862 vq->signalled_used_valid = true; 863 old = vq->signalled_used; 864 new = vq->signalled_used = vring_used_idx(vq); 865 return !v || vring_need_event(vring_get_used_event(vq), new, old); 866 } 867 868 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) 869 { 870 if (!vring_notify(vdev, vq)) { 871 return; 872 } 873 874 trace_virtio_notify(vdev, vq); 875 vdev->isr |= 0x01; 876 virtio_notify_vector(vdev, vq->vector); 877 } 878 879 void virtio_notify_config(VirtIODevice *vdev) 880 { 881 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 882 return; 883 884 vdev->isr |= 0x03; 885 virtio_notify_vector(vdev, vdev->config_vector); 886 } 887 888 static bool virtio_device_endian_needed(void *opaque) 889 { 890 VirtIODevice *vdev = opaque; 891 892 assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN); 893 return vdev->device_endian != virtio_default_endian(); 894 } 895 896 static const VMStateDescription vmstate_virtio_device_endian = { 897 .name = "virtio/device_endian", 898 .version_id = 1, 899 .minimum_version_id = 1, 900 .fields = (VMStateField[]) { 901 VMSTATE_UINT8(device_endian, VirtIODevice), 902 VMSTATE_END_OF_LIST() 903 } 904 }; 905 906 static const VMStateDescription vmstate_virtio = { 907 .name = "virtio", 908 .version_id = 1, 909 .minimum_version_id = 1, 910 .minimum_version_id_old = 1, 911 .fields = (VMStateField[]) { 912 VMSTATE_END_OF_LIST() 913 }, 914 .subsections = (VMStateSubsection[]) { 915 { 916 .vmsd = &vmstate_virtio_device_endian, 917 .needed = &virtio_device_endian_needed 918 }, 919 { 0 } 920 } 921 }; 922 923 void virtio_save(VirtIODevice *vdev, QEMUFile *f) 924 { 925 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 926 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 927 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 928 int i; 929 930 if (k->save_config) { 931 k->save_config(qbus->parent, f); 932 } 933 934 qemu_put_8s(f, &vdev->status); 935 qemu_put_8s(f, &vdev->isr); 936 qemu_put_be16s(f, &vdev->queue_sel); 937 qemu_put_be32s(f, &vdev->guest_features); 938 qemu_put_be32(f, vdev->config_len); 939 qemu_put_buffer(f, vdev->config, vdev->config_len); 940 941 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { 942 if (vdev->vq[i].vring.num == 0) 943 break; 944 } 945 946 qemu_put_be32(f, i); 947 948 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { 949 if (vdev->vq[i].vring.num == 0) 950 break; 951 952 qemu_put_be32(f, vdev->vq[i].vring.num); 953 if (k->has_variable_vring_alignment) { 954 qemu_put_be32(f, vdev->vq[i].vring.align); 955 } 956 qemu_put_be64(f, vdev->vq[i].pa); 957 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); 958 if (k->save_queue) { 959 k->save_queue(qbus->parent, i, f); 960 } 961 } 962 963 if (vdc->save != NULL) { 964 vdc->save(vdev, f); 965 } 966 967 /* Subsections */ 968 vmstate_save_state(f, &vmstate_virtio, vdev, NULL); 969 } 970 971 int virtio_set_features(VirtIODevice *vdev, uint32_t val) 972 { 973 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 974 VirtioBusClass *vbusk = VIRTIO_BUS_GET_CLASS(qbus); 975 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 976 uint32_t supported_features = vbusk->get_features(qbus->parent); 977 bool bad = (val & ~supported_features) != 0; 978 979 val &= supported_features; 980 if (k->set_features) { 981 k->set_features(vdev, val); 982 } 983 vdev->guest_features = val; 984 return bad ? -1 : 0; 985 } 986 987 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) 988 { 989 int i, ret; 990 int32_t config_len; 991 uint32_t num; 992 uint32_t features; 993 uint32_t supported_features; 994 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 995 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 996 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); 997 998 /* 999 * We poison the endianness to ensure it does not get used before 1000 * subsections have been loaded. 1001 */ 1002 vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN; 1003 1004 if (k->load_config) { 1005 ret = k->load_config(qbus->parent, f); 1006 if (ret) 1007 return ret; 1008 } 1009 1010 qemu_get_8s(f, &vdev->status); 1011 qemu_get_8s(f, &vdev->isr); 1012 qemu_get_be16s(f, &vdev->queue_sel); 1013 if (vdev->queue_sel >= VIRTIO_PCI_QUEUE_MAX) { 1014 return -1; 1015 } 1016 qemu_get_be32s(f, &features); 1017 1018 if (virtio_set_features(vdev, features) < 0) { 1019 supported_features = k->get_features(qbus->parent); 1020 error_report("Features 0x%x unsupported. Allowed features: 0x%x", 1021 features, supported_features); 1022 return -1; 1023 } 1024 config_len = qemu_get_be32(f); 1025 1026 /* 1027 * There are cases where the incoming config can be bigger or smaller 1028 * than what we have; so load what we have space for, and skip 1029 * any excess that's in the stream. 1030 */ 1031 qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len)); 1032 1033 while (config_len > vdev->config_len) { 1034 qemu_get_byte(f); 1035 config_len--; 1036 } 1037 1038 num = qemu_get_be32(f); 1039 1040 if (num > VIRTIO_PCI_QUEUE_MAX) { 1041 error_report("Invalid number of PCI queues: 0x%x", num); 1042 return -1; 1043 } 1044 1045 for (i = 0; i < num; i++) { 1046 vdev->vq[i].vring.num = qemu_get_be32(f); 1047 if (k->has_variable_vring_alignment) { 1048 vdev->vq[i].vring.align = qemu_get_be32(f); 1049 } 1050 vdev->vq[i].pa = qemu_get_be64(f); 1051 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); 1052 vdev->vq[i].signalled_used_valid = false; 1053 vdev->vq[i].notification = true; 1054 1055 if (vdev->vq[i].pa) { 1056 virtqueue_init(&vdev->vq[i]); 1057 } else if (vdev->vq[i].last_avail_idx) { 1058 error_report("VQ %d address 0x0 " 1059 "inconsistent with Host index 0x%x", 1060 i, vdev->vq[i].last_avail_idx); 1061 return -1; 1062 } 1063 if (k->load_queue) { 1064 ret = k->load_queue(qbus->parent, i, f); 1065 if (ret) 1066 return ret; 1067 } 1068 } 1069 1070 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 1071 1072 if (vdc->load != NULL) { 1073 ret = vdc->load(vdev, f, version_id); 1074 if (ret) { 1075 return ret; 1076 } 1077 } 1078 1079 /* Subsections */ 1080 ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1); 1081 if (ret) { 1082 return ret; 1083 } 1084 1085 if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) { 1086 vdev->device_endian = virtio_default_endian(); 1087 } 1088 1089 for (i = 0; i < num; i++) { 1090 if (vdev->vq[i].pa) { 1091 uint16_t nheads; 1092 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx; 1093 /* Check it isn't doing strange things with descriptor numbers. */ 1094 if (nheads > vdev->vq[i].vring.num) { 1095 error_report("VQ %d size 0x%x Guest index 0x%x " 1096 "inconsistent with Host index 0x%x: delta 0x%x", 1097 i, vdev->vq[i].vring.num, 1098 vring_avail_idx(&vdev->vq[i]), 1099 vdev->vq[i].last_avail_idx, nheads); 1100 return -1; 1101 } 1102 } 1103 } 1104 1105 return 0; 1106 } 1107 1108 void virtio_cleanup(VirtIODevice *vdev) 1109 { 1110 qemu_del_vm_change_state_handler(vdev->vmstate); 1111 g_free(vdev->config); 1112 g_free(vdev->vq); 1113 g_free(vdev->vector_queues); 1114 } 1115 1116 static void virtio_vmstate_change(void *opaque, int running, RunState state) 1117 { 1118 VirtIODevice *vdev = opaque; 1119 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1120 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1121 bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); 1122 vdev->vm_running = running; 1123 1124 if (backend_run) { 1125 virtio_set_status(vdev, vdev->status); 1126 } 1127 1128 if (k->vmstate_change) { 1129 k->vmstate_change(qbus->parent, backend_run); 1130 } 1131 1132 if (!backend_run) { 1133 virtio_set_status(vdev, vdev->status); 1134 } 1135 } 1136 1137 void virtio_instance_init_common(Object *proxy_obj, void *data, 1138 size_t vdev_size, const char *vdev_name) 1139 { 1140 DeviceState *vdev = data; 1141 1142 object_initialize(vdev, vdev_size, vdev_name); 1143 object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL); 1144 object_unref(OBJECT(vdev)); 1145 qdev_alias_all_properties(vdev, proxy_obj); 1146 } 1147 1148 void virtio_init(VirtIODevice *vdev, const char *name, 1149 uint16_t device_id, size_t config_size) 1150 { 1151 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 1152 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 1153 int i; 1154 int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0; 1155 1156 if (nvectors) { 1157 vdev->vector_queues = 1158 g_malloc0(sizeof(*vdev->vector_queues) * nvectors); 1159 } 1160 1161 vdev->device_id = device_id; 1162 vdev->status = 0; 1163 vdev->isr = 0; 1164 vdev->queue_sel = 0; 1165 vdev->config_vector = VIRTIO_NO_VECTOR; 1166 vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX); 1167 vdev->vm_running = runstate_is_running(); 1168 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { 1169 vdev->vq[i].vector = VIRTIO_NO_VECTOR; 1170 vdev->vq[i].vdev = vdev; 1171 vdev->vq[i].queue_index = i; 1172 } 1173 1174 vdev->name = name; 1175 vdev->config_len = config_size; 1176 if (vdev->config_len) { 1177 vdev->config = g_malloc0(config_size); 1178 } else { 1179 vdev->config = NULL; 1180 } 1181 vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, 1182 vdev); 1183 vdev->device_endian = virtio_default_endian(); 1184 } 1185 1186 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) 1187 { 1188 return vdev->vq[n].vring.desc; 1189 } 1190 1191 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n) 1192 { 1193 return vdev->vq[n].vring.avail; 1194 } 1195 1196 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n) 1197 { 1198 return vdev->vq[n].vring.used; 1199 } 1200 1201 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n) 1202 { 1203 return vdev->vq[n].vring.desc; 1204 } 1205 1206 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) 1207 { 1208 return sizeof(VRingDesc) * vdev->vq[n].vring.num; 1209 } 1210 1211 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) 1212 { 1213 return offsetof(VRingAvail, ring) + 1214 sizeof(uint64_t) * vdev->vq[n].vring.num; 1215 } 1216 1217 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) 1218 { 1219 return offsetof(VRingUsed, ring) + 1220 sizeof(VRingUsedElem) * vdev->vq[n].vring.num; 1221 } 1222 1223 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n) 1224 { 1225 return vdev->vq[n].vring.used - vdev->vq[n].vring.desc + 1226 virtio_queue_get_used_size(vdev, n); 1227 } 1228 1229 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) 1230 { 1231 return vdev->vq[n].last_avail_idx; 1232 } 1233 1234 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) 1235 { 1236 vdev->vq[n].last_avail_idx = idx; 1237 } 1238 1239 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n) 1240 { 1241 vdev->vq[n].signalled_used_valid = false; 1242 } 1243 1244 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n) 1245 { 1246 return vdev->vq + n; 1247 } 1248 1249 uint16_t virtio_get_queue_index(VirtQueue *vq) 1250 { 1251 return vq->queue_index; 1252 } 1253 1254 static void virtio_queue_guest_notifier_read(EventNotifier *n) 1255 { 1256 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier); 1257 if (event_notifier_test_and_clear(n)) { 1258 virtio_irq(vq); 1259 } 1260 } 1261 1262 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, 1263 bool with_irqfd) 1264 { 1265 if (assign && !with_irqfd) { 1266 event_notifier_set_handler(&vq->guest_notifier, 1267 virtio_queue_guest_notifier_read); 1268 } else { 1269 event_notifier_set_handler(&vq->guest_notifier, NULL); 1270 } 1271 if (!assign) { 1272 /* Test and clear notifier before closing it, 1273 * in case poll callback didn't have time to run. */ 1274 virtio_queue_guest_notifier_read(&vq->guest_notifier); 1275 } 1276 } 1277 1278 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) 1279 { 1280 return &vq->guest_notifier; 1281 } 1282 1283 static void virtio_queue_host_notifier_read(EventNotifier *n) 1284 { 1285 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 1286 if (event_notifier_test_and_clear(n)) { 1287 virtio_queue_notify_vq(vq); 1288 } 1289 } 1290 1291 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, 1292 bool set_handler) 1293 { 1294 if (assign && set_handler) { 1295 event_notifier_set_handler(&vq->host_notifier, 1296 virtio_queue_host_notifier_read); 1297 } else { 1298 event_notifier_set_handler(&vq->host_notifier, NULL); 1299 } 1300 if (!assign) { 1301 /* Test and clear notifier before after disabling event, 1302 * in case poll callback didn't have time to run. */ 1303 virtio_queue_host_notifier_read(&vq->host_notifier); 1304 } 1305 } 1306 1307 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) 1308 { 1309 return &vq->host_notifier; 1310 } 1311 1312 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) 1313 { 1314 g_free(vdev->bus_name); 1315 vdev->bus_name = g_strdup(bus_name); 1316 } 1317 1318 static void virtio_device_realize(DeviceState *dev, Error **errp) 1319 { 1320 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1321 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 1322 Error *err = NULL; 1323 1324 if (vdc->realize != NULL) { 1325 vdc->realize(dev, &err); 1326 if (err != NULL) { 1327 error_propagate(errp, err); 1328 return; 1329 } 1330 } 1331 virtio_bus_device_plugged(vdev); 1332 } 1333 1334 static void virtio_device_unrealize(DeviceState *dev, Error **errp) 1335 { 1336 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1337 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 1338 Error *err = NULL; 1339 1340 virtio_bus_device_unplugged(vdev); 1341 1342 if (vdc->unrealize != NULL) { 1343 vdc->unrealize(dev, &err); 1344 if (err != NULL) { 1345 error_propagate(errp, err); 1346 return; 1347 } 1348 } 1349 1350 g_free(vdev->bus_name); 1351 vdev->bus_name = NULL; 1352 } 1353 1354 static void virtio_device_class_init(ObjectClass *klass, void *data) 1355 { 1356 /* Set the default value here. */ 1357 DeviceClass *dc = DEVICE_CLASS(klass); 1358 1359 dc->realize = virtio_device_realize; 1360 dc->unrealize = virtio_device_unrealize; 1361 dc->bus_type = TYPE_VIRTIO_BUS; 1362 } 1363 1364 static const TypeInfo virtio_device_info = { 1365 .name = TYPE_VIRTIO_DEVICE, 1366 .parent = TYPE_DEVICE, 1367 .instance_size = sizeof(VirtIODevice), 1368 .class_init = virtio_device_class_init, 1369 .abstract = true, 1370 .class_size = sizeof(VirtioDeviceClass), 1371 }; 1372 1373 static void virtio_register_types(void) 1374 { 1375 type_register_static(&virtio_device_info); 1376 } 1377 1378 type_init(virtio_register_types) 1379