1 /* 2 * Virtio Support 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include <inttypes.h> 15 16 #include "trace.h" 17 #include "qemu/error-report.h" 18 #include "hw/virtio/virtio.h" 19 #include "qemu/atomic.h" 20 #include "hw/virtio/virtio-bus.h" 21 22 /* 23 * The alignment to use between consumer and producer parts of vring. 24 * x86 pagesize again. This is the default, used by transports like PCI 25 * which don't provide a means for the guest to tell the host the alignment. 26 */ 27 #define VIRTIO_PCI_VRING_ALIGN 4096 28 29 typedef struct VRingDesc 30 { 31 uint64_t addr; 32 uint32_t len; 33 uint16_t flags; 34 uint16_t next; 35 } VRingDesc; 36 37 typedef struct VRingAvail 38 { 39 uint16_t flags; 40 uint16_t idx; 41 uint16_t ring[0]; 42 } VRingAvail; 43 44 typedef struct VRingUsedElem 45 { 46 uint32_t id; 47 uint32_t len; 48 } VRingUsedElem; 49 50 typedef struct VRingUsed 51 { 52 uint16_t flags; 53 uint16_t idx; 54 VRingUsedElem ring[0]; 55 } VRingUsed; 56 57 typedef struct VRing 58 { 59 unsigned int num; 60 unsigned int align; 61 hwaddr desc; 62 hwaddr avail; 63 hwaddr used; 64 } VRing; 65 66 struct VirtQueue 67 { 68 VRing vring; 69 hwaddr pa; 70 uint16_t last_avail_idx; 71 /* Last used index value we have signalled on */ 72 uint16_t signalled_used; 73 74 /* Last used index value we have signalled on */ 75 bool signalled_used_valid; 76 77 /* Notification enabled? */ 78 bool notification; 79 80 uint16_t queue_index; 81 82 int inuse; 83 84 uint16_t vector; 85 void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); 86 VirtIODevice *vdev; 87 EventNotifier guest_notifier; 88 EventNotifier host_notifier; 89 }; 90 91 /* virt queue functions */ 92 static void virtqueue_init(VirtQueue *vq) 93 { 94 hwaddr pa = vq->pa; 95 96 vq->vring.desc = pa; 97 vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc); 98 vq->vring.used = vring_align(vq->vring.avail + 99 offsetof(VRingAvail, ring[vq->vring.num]), 100 vq->vring.align); 101 } 102 103 static inline uint64_t vring_desc_addr(hwaddr desc_pa, int i) 104 { 105 hwaddr pa; 106 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr); 107 return ldq_phys(pa); 108 } 109 110 static inline uint32_t vring_desc_len(hwaddr desc_pa, int i) 111 { 112 hwaddr pa; 113 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len); 114 return ldl_phys(pa); 115 } 116 117 static inline uint16_t vring_desc_flags(hwaddr desc_pa, int i) 118 { 119 hwaddr pa; 120 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags); 121 return lduw_phys(pa); 122 } 123 124 static inline uint16_t vring_desc_next(hwaddr desc_pa, int i) 125 { 126 hwaddr pa; 127 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next); 128 return lduw_phys(pa); 129 } 130 131 static inline uint16_t vring_avail_flags(VirtQueue *vq) 132 { 133 hwaddr pa; 134 pa = vq->vring.avail + offsetof(VRingAvail, flags); 135 return lduw_phys(pa); 136 } 137 138 static inline uint16_t vring_avail_idx(VirtQueue *vq) 139 { 140 hwaddr pa; 141 pa = vq->vring.avail + offsetof(VRingAvail, idx); 142 return lduw_phys(pa); 143 } 144 145 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) 146 { 147 hwaddr pa; 148 pa = vq->vring.avail + offsetof(VRingAvail, ring[i]); 149 return lduw_phys(pa); 150 } 151 152 static inline uint16_t vring_used_event(VirtQueue *vq) 153 { 154 return vring_avail_ring(vq, vq->vring.num); 155 } 156 157 static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val) 158 { 159 hwaddr pa; 160 pa = vq->vring.used + offsetof(VRingUsed, ring[i].id); 161 stl_phys(pa, val); 162 } 163 164 static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val) 165 { 166 hwaddr pa; 167 pa = vq->vring.used + offsetof(VRingUsed, ring[i].len); 168 stl_phys(pa, val); 169 } 170 171 static uint16_t vring_used_idx(VirtQueue *vq) 172 { 173 hwaddr pa; 174 pa = vq->vring.used + offsetof(VRingUsed, idx); 175 return lduw_phys(pa); 176 } 177 178 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) 179 { 180 hwaddr pa; 181 pa = vq->vring.used + offsetof(VRingUsed, idx); 182 stw_phys(pa, val); 183 } 184 185 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) 186 { 187 hwaddr pa; 188 pa = vq->vring.used + offsetof(VRingUsed, flags); 189 stw_phys(pa, lduw_phys(pa) | mask); 190 } 191 192 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) 193 { 194 hwaddr pa; 195 pa = vq->vring.used + offsetof(VRingUsed, flags); 196 stw_phys(pa, lduw_phys(pa) & ~mask); 197 } 198 199 static inline void vring_avail_event(VirtQueue *vq, uint16_t val) 200 { 201 hwaddr pa; 202 if (!vq->notification) { 203 return; 204 } 205 pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]); 206 stw_phys(pa, val); 207 } 208 209 void virtio_queue_set_notification(VirtQueue *vq, int enable) 210 { 211 vq->notification = enable; 212 if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) { 213 vring_avail_event(vq, vring_avail_idx(vq)); 214 } else if (enable) { 215 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); 216 } else { 217 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); 218 } 219 if (enable) { 220 /* Expose avail event/used flags before caller checks the avail idx. */ 221 smp_mb(); 222 } 223 } 224 225 int virtio_queue_ready(VirtQueue *vq) 226 { 227 return vq->vring.avail != 0; 228 } 229 230 int virtio_queue_empty(VirtQueue *vq) 231 { 232 return vring_avail_idx(vq) == vq->last_avail_idx; 233 } 234 235 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, 236 unsigned int len, unsigned int idx) 237 { 238 unsigned int offset; 239 int i; 240 241 trace_virtqueue_fill(vq, elem, len, idx); 242 243 offset = 0; 244 for (i = 0; i < elem->in_num; i++) { 245 size_t size = MIN(len - offset, elem->in_sg[i].iov_len); 246 247 cpu_physical_memory_unmap(elem->in_sg[i].iov_base, 248 elem->in_sg[i].iov_len, 249 1, size); 250 251 offset += size; 252 } 253 254 for (i = 0; i < elem->out_num; i++) 255 cpu_physical_memory_unmap(elem->out_sg[i].iov_base, 256 elem->out_sg[i].iov_len, 257 0, elem->out_sg[i].iov_len); 258 259 idx = (idx + vring_used_idx(vq)) % vq->vring.num; 260 261 /* Get a pointer to the next entry in the used ring. */ 262 vring_used_ring_id(vq, idx, elem->index); 263 vring_used_ring_len(vq, idx, len); 264 } 265 266 void virtqueue_flush(VirtQueue *vq, unsigned int count) 267 { 268 uint16_t old, new; 269 /* Make sure buffer is written before we update index. */ 270 smp_wmb(); 271 trace_virtqueue_flush(vq, count); 272 old = vring_used_idx(vq); 273 new = old + count; 274 vring_used_idx_set(vq, new); 275 vq->inuse -= count; 276 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) 277 vq->signalled_used_valid = false; 278 } 279 280 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, 281 unsigned int len) 282 { 283 virtqueue_fill(vq, elem, len, 0); 284 virtqueue_flush(vq, 1); 285 } 286 287 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) 288 { 289 uint16_t num_heads = vring_avail_idx(vq) - idx; 290 291 /* Check it isn't doing very strange things with descriptor numbers. */ 292 if (num_heads > vq->vring.num) { 293 error_report("Guest moved used index from %u to %u", 294 idx, vring_avail_idx(vq)); 295 exit(1); 296 } 297 /* On success, callers read a descriptor at vq->last_avail_idx. 298 * Make sure descriptor read does not bypass avail index read. */ 299 if (num_heads) { 300 smp_rmb(); 301 } 302 303 return num_heads; 304 } 305 306 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) 307 { 308 unsigned int head; 309 310 /* Grab the next descriptor number they're advertising, and increment 311 * the index we've seen. */ 312 head = vring_avail_ring(vq, idx % vq->vring.num); 313 314 /* If their number is silly, that's a fatal mistake. */ 315 if (head >= vq->vring.num) { 316 error_report("Guest says index %u is available", head); 317 exit(1); 318 } 319 320 return head; 321 } 322 323 static unsigned virtqueue_next_desc(hwaddr desc_pa, 324 unsigned int i, unsigned int max) 325 { 326 unsigned int next; 327 328 /* If this descriptor says it doesn't chain, we're done. */ 329 if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT)) 330 return max; 331 332 /* Check they're not leading us off end of descriptors. */ 333 next = vring_desc_next(desc_pa, i); 334 /* Make sure compiler knows to grab that: we don't want it changing! */ 335 smp_wmb(); 336 337 if (next >= max) { 338 error_report("Desc next is %u", next); 339 exit(1); 340 } 341 342 return next; 343 } 344 345 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, 346 unsigned int *out_bytes, 347 unsigned max_in_bytes, unsigned max_out_bytes) 348 { 349 unsigned int idx; 350 unsigned int total_bufs, in_total, out_total; 351 352 idx = vq->last_avail_idx; 353 354 total_bufs = in_total = out_total = 0; 355 while (virtqueue_num_heads(vq, idx)) { 356 unsigned int max, num_bufs, indirect = 0; 357 hwaddr desc_pa; 358 int i; 359 360 max = vq->vring.num; 361 num_bufs = total_bufs; 362 i = virtqueue_get_head(vq, idx++); 363 desc_pa = vq->vring.desc; 364 365 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { 366 if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { 367 error_report("Invalid size for indirect buffer table"); 368 exit(1); 369 } 370 371 /* If we've got too many, that implies a descriptor loop. */ 372 if (num_bufs >= max) { 373 error_report("Looped descriptor"); 374 exit(1); 375 } 376 377 /* loop over the indirect descriptor table */ 378 indirect = 1; 379 max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc); 380 desc_pa = vring_desc_addr(desc_pa, i); 381 num_bufs = i = 0; 382 } 383 384 do { 385 /* If we've got too many, that implies a descriptor loop. */ 386 if (++num_bufs > max) { 387 error_report("Looped descriptor"); 388 exit(1); 389 } 390 391 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) { 392 in_total += vring_desc_len(desc_pa, i); 393 } else { 394 out_total += vring_desc_len(desc_pa, i); 395 } 396 if (in_total >= max_in_bytes && out_total >= max_out_bytes) { 397 goto done; 398 } 399 } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max); 400 401 if (!indirect) 402 total_bufs = num_bufs; 403 else 404 total_bufs++; 405 } 406 done: 407 if (in_bytes) { 408 *in_bytes = in_total; 409 } 410 if (out_bytes) { 411 *out_bytes = out_total; 412 } 413 } 414 415 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, 416 unsigned int out_bytes) 417 { 418 unsigned int in_total, out_total; 419 420 virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes); 421 return in_bytes <= in_total && out_bytes <= out_total; 422 } 423 424 void virtqueue_map_sg(struct iovec *sg, hwaddr *addr, 425 size_t num_sg, int is_write) 426 { 427 unsigned int i; 428 hwaddr len; 429 430 for (i = 0; i < num_sg; i++) { 431 len = sg[i].iov_len; 432 sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write); 433 if (sg[i].iov_base == NULL || len != sg[i].iov_len) { 434 error_report("virtio: trying to map MMIO memory"); 435 exit(1); 436 } 437 } 438 } 439 440 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) 441 { 442 unsigned int i, head, max; 443 hwaddr desc_pa = vq->vring.desc; 444 445 if (!virtqueue_num_heads(vq, vq->last_avail_idx)) 446 return 0; 447 448 /* When we start there are none of either input nor output. */ 449 elem->out_num = elem->in_num = 0; 450 451 max = vq->vring.num; 452 453 i = head = virtqueue_get_head(vq, vq->last_avail_idx++); 454 if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) { 455 vring_avail_event(vq, vring_avail_idx(vq)); 456 } 457 458 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { 459 if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { 460 error_report("Invalid size for indirect buffer table"); 461 exit(1); 462 } 463 464 /* loop over the indirect descriptor table */ 465 max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc); 466 desc_pa = vring_desc_addr(desc_pa, i); 467 i = 0; 468 } 469 470 /* Collect all the descriptors */ 471 do { 472 struct iovec *sg; 473 474 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) { 475 if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) { 476 error_report("Too many write descriptors in indirect table"); 477 exit(1); 478 } 479 elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i); 480 sg = &elem->in_sg[elem->in_num++]; 481 } else { 482 if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) { 483 error_report("Too many read descriptors in indirect table"); 484 exit(1); 485 } 486 elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i); 487 sg = &elem->out_sg[elem->out_num++]; 488 } 489 490 sg->iov_len = vring_desc_len(desc_pa, i); 491 492 /* If we've got too many, that implies a descriptor loop. */ 493 if ((elem->in_num + elem->out_num) > max) { 494 error_report("Looped descriptor"); 495 exit(1); 496 } 497 } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max); 498 499 /* Now map what we have collected */ 500 virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1); 501 virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0); 502 503 elem->index = head; 504 505 vq->inuse++; 506 507 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); 508 return elem->in_num + elem->out_num; 509 } 510 511 /* virtio device */ 512 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector) 513 { 514 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 515 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 516 517 if (k->notify) { 518 k->notify(qbus->parent, vector); 519 } 520 } 521 522 void virtio_update_irq(VirtIODevice *vdev) 523 { 524 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 525 } 526 527 void virtio_set_status(VirtIODevice *vdev, uint8_t val) 528 { 529 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 530 trace_virtio_set_status(vdev, val); 531 532 if (k->set_status) { 533 k->set_status(vdev, val); 534 } 535 vdev->status = val; 536 } 537 538 void virtio_reset(void *opaque) 539 { 540 VirtIODevice *vdev = opaque; 541 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 542 int i; 543 544 virtio_set_status(vdev, 0); 545 546 if (k->reset) { 547 k->reset(vdev); 548 } 549 550 vdev->guest_features = 0; 551 vdev->queue_sel = 0; 552 vdev->status = 0; 553 vdev->isr = 0; 554 vdev->config_vector = VIRTIO_NO_VECTOR; 555 virtio_notify_vector(vdev, vdev->config_vector); 556 557 for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { 558 vdev->vq[i].vring.desc = 0; 559 vdev->vq[i].vring.avail = 0; 560 vdev->vq[i].vring.used = 0; 561 vdev->vq[i].last_avail_idx = 0; 562 vdev->vq[i].pa = 0; 563 vdev->vq[i].vector = VIRTIO_NO_VECTOR; 564 vdev->vq[i].signalled_used = 0; 565 vdev->vq[i].signalled_used_valid = false; 566 vdev->vq[i].notification = true; 567 } 568 } 569 570 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr) 571 { 572 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 573 uint8_t val; 574 575 if (addr + sizeof(val) > vdev->config_len) { 576 return (uint32_t)-1; 577 } 578 579 k->get_config(vdev, vdev->config); 580 581 val = ldub_p(vdev->config + addr); 582 return val; 583 } 584 585 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr) 586 { 587 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 588 uint16_t val; 589 590 if (addr + sizeof(val) > vdev->config_len) { 591 return (uint32_t)-1; 592 } 593 594 k->get_config(vdev, vdev->config); 595 596 val = lduw_p(vdev->config + addr); 597 return val; 598 } 599 600 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr) 601 { 602 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 603 uint32_t val; 604 605 if (addr + sizeof(val) > vdev->config_len) { 606 return (uint32_t)-1; 607 } 608 609 k->get_config(vdev, vdev->config); 610 611 val = ldl_p(vdev->config + addr); 612 return val; 613 } 614 615 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data) 616 { 617 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 618 uint8_t val = data; 619 620 if (addr + sizeof(val) > vdev->config_len) { 621 return; 622 } 623 624 stb_p(vdev->config + addr, val); 625 626 if (k->set_config) { 627 k->set_config(vdev, vdev->config); 628 } 629 } 630 631 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data) 632 { 633 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 634 uint16_t val = data; 635 636 if (addr + sizeof(val) > vdev->config_len) { 637 return; 638 } 639 640 stw_p(vdev->config + addr, val); 641 642 if (k->set_config) { 643 k->set_config(vdev, vdev->config); 644 } 645 } 646 647 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data) 648 { 649 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 650 uint32_t val = data; 651 652 if (addr + sizeof(val) > vdev->config_len) { 653 return; 654 } 655 656 stl_p(vdev->config + addr, val); 657 658 if (k->set_config) { 659 k->set_config(vdev, vdev->config); 660 } 661 } 662 663 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr) 664 { 665 vdev->vq[n].pa = addr; 666 virtqueue_init(&vdev->vq[n]); 667 } 668 669 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n) 670 { 671 return vdev->vq[n].pa; 672 } 673 674 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) 675 { 676 /* Don't allow guest to flip queue between existent and 677 * nonexistent states, or to set it to an invalid size. 678 */ 679 if (!!num != !!vdev->vq[n].vring.num || 680 num > VIRTQUEUE_MAX_SIZE || 681 num < 0) { 682 return; 683 } 684 vdev->vq[n].vring.num = num; 685 virtqueue_init(&vdev->vq[n]); 686 } 687 688 int virtio_queue_get_num(VirtIODevice *vdev, int n) 689 { 690 return vdev->vq[n].vring.num; 691 } 692 693 int virtio_queue_get_id(VirtQueue *vq) 694 { 695 VirtIODevice *vdev = vq->vdev; 696 assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]); 697 return vq - &vdev->vq[0]; 698 } 699 700 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) 701 { 702 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 703 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 704 705 /* Check that the transport told us it was going to do this 706 * (so a buggy transport will immediately assert rather than 707 * silently failing to migrate this state) 708 */ 709 assert(k->has_variable_vring_alignment); 710 711 vdev->vq[n].vring.align = align; 712 virtqueue_init(&vdev->vq[n]); 713 } 714 715 void virtio_queue_notify_vq(VirtQueue *vq) 716 { 717 if (vq->vring.desc) { 718 VirtIODevice *vdev = vq->vdev; 719 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); 720 vq->handle_output(vdev, vq); 721 } 722 } 723 724 void virtio_queue_notify(VirtIODevice *vdev, int n) 725 { 726 virtio_queue_notify_vq(&vdev->vq[n]); 727 } 728 729 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) 730 { 731 return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector : 732 VIRTIO_NO_VECTOR; 733 } 734 735 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector) 736 { 737 if (n < VIRTIO_PCI_QUEUE_MAX) 738 vdev->vq[n].vector = vector; 739 } 740 741 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, 742 void (*handle_output)(VirtIODevice *, VirtQueue *)) 743 { 744 int i; 745 746 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { 747 if (vdev->vq[i].vring.num == 0) 748 break; 749 } 750 751 if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) 752 abort(); 753 754 vdev->vq[i].vring.num = queue_size; 755 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN; 756 vdev->vq[i].handle_output = handle_output; 757 758 return &vdev->vq[i]; 759 } 760 761 void virtio_del_queue(VirtIODevice *vdev, int n) 762 { 763 if (n < 0 || n >= VIRTIO_PCI_QUEUE_MAX) { 764 abort(); 765 } 766 767 vdev->vq[n].vring.num = 0; 768 } 769 770 void virtio_irq(VirtQueue *vq) 771 { 772 trace_virtio_irq(vq); 773 vq->vdev->isr |= 0x01; 774 virtio_notify_vector(vq->vdev, vq->vector); 775 } 776 777 /* Assuming a given event_idx value from the other size, if 778 * we have just incremented index from old to new_idx, 779 * should we trigger an event? */ 780 static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old) 781 { 782 /* Note: Xen has similar logic for notification hold-off 783 * in include/xen/interface/io/ring.h with req_event and req_prod 784 * corresponding to event_idx + 1 and new respectively. 785 * Note also that req_event and req_prod in Xen start at 1, 786 * event indexes in virtio start at 0. */ 787 return (uint16_t)(new - event - 1) < (uint16_t)(new - old); 788 } 789 790 static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq) 791 { 792 uint16_t old, new; 793 bool v; 794 /* We need to expose used array entries before checking used event. */ 795 smp_mb(); 796 /* Always notify when queue is empty (when feature acknowledge) */ 797 if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) && 798 !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) { 799 return true; 800 } 801 802 if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) { 803 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); 804 } 805 806 v = vq->signalled_used_valid; 807 vq->signalled_used_valid = true; 808 old = vq->signalled_used; 809 new = vq->signalled_used = vring_used_idx(vq); 810 return !v || vring_need_event(vring_used_event(vq), new, old); 811 } 812 813 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) 814 { 815 if (!vring_notify(vdev, vq)) { 816 return; 817 } 818 819 trace_virtio_notify(vdev, vq); 820 vdev->isr |= 0x01; 821 virtio_notify_vector(vdev, vq->vector); 822 } 823 824 void virtio_notify_config(VirtIODevice *vdev) 825 { 826 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 827 return; 828 829 vdev->isr |= 0x03; 830 virtio_notify_vector(vdev, vdev->config_vector); 831 } 832 833 void virtio_save(VirtIODevice *vdev, QEMUFile *f) 834 { 835 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 836 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 837 int i; 838 839 if (k->save_config) { 840 k->save_config(qbus->parent, f); 841 } 842 843 qemu_put_8s(f, &vdev->status); 844 qemu_put_8s(f, &vdev->isr); 845 qemu_put_be16s(f, &vdev->queue_sel); 846 qemu_put_be32s(f, &vdev->guest_features); 847 qemu_put_be32(f, vdev->config_len); 848 qemu_put_buffer(f, vdev->config, vdev->config_len); 849 850 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { 851 if (vdev->vq[i].vring.num == 0) 852 break; 853 } 854 855 qemu_put_be32(f, i); 856 857 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { 858 if (vdev->vq[i].vring.num == 0) 859 break; 860 861 qemu_put_be32(f, vdev->vq[i].vring.num); 862 if (k->has_variable_vring_alignment) { 863 qemu_put_be32(f, vdev->vq[i].vring.align); 864 } 865 qemu_put_be64(f, vdev->vq[i].pa); 866 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); 867 if (k->save_queue) { 868 k->save_queue(qbus->parent, i, f); 869 } 870 } 871 } 872 873 int virtio_set_features(VirtIODevice *vdev, uint32_t val) 874 { 875 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 876 VirtioBusClass *vbusk = VIRTIO_BUS_GET_CLASS(qbus); 877 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 878 uint32_t supported_features = vbusk->get_features(qbus->parent); 879 bool bad = (val & ~supported_features) != 0; 880 881 val &= supported_features; 882 if (k->set_features) { 883 k->set_features(vdev, val); 884 } 885 vdev->guest_features = val; 886 return bad ? -1 : 0; 887 } 888 889 int virtio_load(VirtIODevice *vdev, QEMUFile *f) 890 { 891 int num, i, ret; 892 uint32_t features; 893 uint32_t supported_features; 894 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 895 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 896 897 if (k->load_config) { 898 ret = k->load_config(qbus->parent, f); 899 if (ret) 900 return ret; 901 } 902 903 qemu_get_8s(f, &vdev->status); 904 qemu_get_8s(f, &vdev->isr); 905 qemu_get_be16s(f, &vdev->queue_sel); 906 qemu_get_be32s(f, &features); 907 908 if (virtio_set_features(vdev, features) < 0) { 909 supported_features = k->get_features(qbus->parent); 910 error_report("Features 0x%x unsupported. Allowed features: 0x%x", 911 features, supported_features); 912 return -1; 913 } 914 vdev->config_len = qemu_get_be32(f); 915 qemu_get_buffer(f, vdev->config, vdev->config_len); 916 917 num = qemu_get_be32(f); 918 919 for (i = 0; i < num; i++) { 920 vdev->vq[i].vring.num = qemu_get_be32(f); 921 if (k->has_variable_vring_alignment) { 922 vdev->vq[i].vring.align = qemu_get_be32(f); 923 } 924 vdev->vq[i].pa = qemu_get_be64(f); 925 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); 926 vdev->vq[i].signalled_used_valid = false; 927 vdev->vq[i].notification = true; 928 929 if (vdev->vq[i].pa) { 930 uint16_t nheads; 931 virtqueue_init(&vdev->vq[i]); 932 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx; 933 /* Check it isn't doing very strange things with descriptor numbers. */ 934 if (nheads > vdev->vq[i].vring.num) { 935 error_report("VQ %d size 0x%x Guest index 0x%x " 936 "inconsistent with Host index 0x%x: delta 0x%x", 937 i, vdev->vq[i].vring.num, 938 vring_avail_idx(&vdev->vq[i]), 939 vdev->vq[i].last_avail_idx, nheads); 940 return -1; 941 } 942 } else if (vdev->vq[i].last_avail_idx) { 943 error_report("VQ %d address 0x0 " 944 "inconsistent with Host index 0x%x", 945 i, vdev->vq[i].last_avail_idx); 946 return -1; 947 } 948 if (k->load_queue) { 949 ret = k->load_queue(qbus->parent, i, f); 950 if (ret) 951 return ret; 952 } 953 } 954 955 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 956 return 0; 957 } 958 959 void virtio_cleanup(VirtIODevice *vdev) 960 { 961 qemu_del_vm_change_state_handler(vdev->vmstate); 962 g_free(vdev->config); 963 g_free(vdev->vq); 964 } 965 966 static void virtio_vmstate_change(void *opaque, int running, RunState state) 967 { 968 VirtIODevice *vdev = opaque; 969 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 970 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 971 bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); 972 vdev->vm_running = running; 973 974 if (backend_run) { 975 virtio_set_status(vdev, vdev->status); 976 } 977 978 if (k->vmstate_change) { 979 k->vmstate_change(qbus->parent, backend_run); 980 } 981 982 if (!backend_run) { 983 virtio_set_status(vdev, vdev->status); 984 } 985 } 986 987 void virtio_init(VirtIODevice *vdev, const char *name, 988 uint16_t device_id, size_t config_size) 989 { 990 int i; 991 vdev->device_id = device_id; 992 vdev->status = 0; 993 vdev->isr = 0; 994 vdev->queue_sel = 0; 995 vdev->config_vector = VIRTIO_NO_VECTOR; 996 vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX); 997 vdev->vm_running = runstate_is_running(); 998 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { 999 vdev->vq[i].vector = VIRTIO_NO_VECTOR; 1000 vdev->vq[i].vdev = vdev; 1001 vdev->vq[i].queue_index = i; 1002 } 1003 1004 vdev->name = name; 1005 vdev->config_len = config_size; 1006 if (vdev->config_len) { 1007 vdev->config = g_malloc0(config_size); 1008 } else { 1009 vdev->config = NULL; 1010 } 1011 vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, 1012 vdev); 1013 } 1014 1015 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) 1016 { 1017 return vdev->vq[n].vring.desc; 1018 } 1019 1020 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n) 1021 { 1022 return vdev->vq[n].vring.avail; 1023 } 1024 1025 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n) 1026 { 1027 return vdev->vq[n].vring.used; 1028 } 1029 1030 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n) 1031 { 1032 return vdev->vq[n].vring.desc; 1033 } 1034 1035 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) 1036 { 1037 return sizeof(VRingDesc) * vdev->vq[n].vring.num; 1038 } 1039 1040 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) 1041 { 1042 return offsetof(VRingAvail, ring) + 1043 sizeof(uint64_t) * vdev->vq[n].vring.num; 1044 } 1045 1046 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) 1047 { 1048 return offsetof(VRingUsed, ring) + 1049 sizeof(VRingUsedElem) * vdev->vq[n].vring.num; 1050 } 1051 1052 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n) 1053 { 1054 return vdev->vq[n].vring.used - vdev->vq[n].vring.desc + 1055 virtio_queue_get_used_size(vdev, n); 1056 } 1057 1058 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) 1059 { 1060 return vdev->vq[n].last_avail_idx; 1061 } 1062 1063 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) 1064 { 1065 vdev->vq[n].last_avail_idx = idx; 1066 } 1067 1068 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n) 1069 { 1070 vdev->vq[n].signalled_used_valid = false; 1071 } 1072 1073 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n) 1074 { 1075 return vdev->vq + n; 1076 } 1077 1078 uint16_t virtio_get_queue_index(VirtQueue *vq) 1079 { 1080 return vq->queue_index; 1081 } 1082 1083 static void virtio_queue_guest_notifier_read(EventNotifier *n) 1084 { 1085 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier); 1086 if (event_notifier_test_and_clear(n)) { 1087 virtio_irq(vq); 1088 } 1089 } 1090 1091 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, 1092 bool with_irqfd) 1093 { 1094 if (assign && !with_irqfd) { 1095 event_notifier_set_handler(&vq->guest_notifier, 1096 virtio_queue_guest_notifier_read); 1097 } else { 1098 event_notifier_set_handler(&vq->guest_notifier, NULL); 1099 } 1100 if (!assign) { 1101 /* Test and clear notifier before closing it, 1102 * in case poll callback didn't have time to run. */ 1103 virtio_queue_guest_notifier_read(&vq->guest_notifier); 1104 } 1105 } 1106 1107 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) 1108 { 1109 return &vq->guest_notifier; 1110 } 1111 1112 static void virtio_queue_host_notifier_read(EventNotifier *n) 1113 { 1114 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 1115 if (event_notifier_test_and_clear(n)) { 1116 virtio_queue_notify_vq(vq); 1117 } 1118 } 1119 1120 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, 1121 bool set_handler) 1122 { 1123 if (assign && set_handler) { 1124 event_notifier_set_handler(&vq->host_notifier, 1125 virtio_queue_host_notifier_read); 1126 } else { 1127 event_notifier_set_handler(&vq->host_notifier, NULL); 1128 } 1129 if (!assign) { 1130 /* Test and clear notifier before after disabling event, 1131 * in case poll callback didn't have time to run. */ 1132 virtio_queue_host_notifier_read(&vq->host_notifier); 1133 } 1134 } 1135 1136 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) 1137 { 1138 return &vq->host_notifier; 1139 } 1140 1141 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) 1142 { 1143 if (vdev->bus_name) { 1144 g_free(vdev->bus_name); 1145 vdev->bus_name = NULL; 1146 } 1147 1148 if (bus_name) { 1149 vdev->bus_name = g_strdup(bus_name); 1150 } 1151 } 1152 1153 static void virtio_device_realize(DeviceState *dev, Error **errp) 1154 { 1155 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1156 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 1157 Error *err = NULL; 1158 1159 if (vdc->realize != NULL) { 1160 vdc->realize(dev, &err); 1161 if (err != NULL) { 1162 error_propagate(errp, err); 1163 return; 1164 } 1165 } 1166 virtio_bus_device_plugged(vdev); 1167 } 1168 1169 static void virtio_device_unrealize(DeviceState *dev, Error **errp) 1170 { 1171 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1172 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 1173 Error *err = NULL; 1174 1175 virtio_bus_device_unplugged(vdev); 1176 1177 if (vdc->unrealize != NULL) { 1178 vdc->unrealize(dev, &err); 1179 if (err != NULL) { 1180 error_propagate(errp, err); 1181 return; 1182 } 1183 } 1184 1185 if (vdev->bus_name) { 1186 g_free(vdev->bus_name); 1187 vdev->bus_name = NULL; 1188 } 1189 } 1190 1191 static void virtio_device_class_init(ObjectClass *klass, void *data) 1192 { 1193 /* Set the default value here. */ 1194 DeviceClass *dc = DEVICE_CLASS(klass); 1195 1196 dc->realize = virtio_device_realize; 1197 dc->unrealize = virtio_device_unrealize; 1198 dc->bus_type = TYPE_VIRTIO_BUS; 1199 } 1200 1201 static const TypeInfo virtio_device_info = { 1202 .name = TYPE_VIRTIO_DEVICE, 1203 .parent = TYPE_DEVICE, 1204 .instance_size = sizeof(VirtIODevice), 1205 .class_init = virtio_device_class_init, 1206 .abstract = true, 1207 .class_size = sizeof(VirtioDeviceClass), 1208 }; 1209 1210 static void virtio_register_types(void) 1211 { 1212 type_register_static(&virtio_device_info); 1213 } 1214 1215 type_init(virtio_register_types) 1216