1 /* 2 * Virtio Support 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include <inttypes.h> 15 16 #include "trace.h" 17 #include "qemu/error-report.h" 18 #include "hw/virtio/virtio.h" 19 #include "qemu/atomic.h" 20 #include "hw/virtio/virtio-bus.h" 21 22 /* 23 * The alignment to use between consumer and producer parts of vring. 24 * x86 pagesize again. This is the default, used by transports like PCI 25 * which don't provide a means for the guest to tell the host the alignment. 26 */ 27 #define VIRTIO_PCI_VRING_ALIGN 4096 28 29 typedef struct VRingDesc 30 { 31 uint64_t addr; 32 uint32_t len; 33 uint16_t flags; 34 uint16_t next; 35 } VRingDesc; 36 37 typedef struct VRingAvail 38 { 39 uint16_t flags; 40 uint16_t idx; 41 uint16_t ring[0]; 42 } VRingAvail; 43 44 typedef struct VRingUsedElem 45 { 46 uint32_t id; 47 uint32_t len; 48 } VRingUsedElem; 49 50 typedef struct VRingUsed 51 { 52 uint16_t flags; 53 uint16_t idx; 54 VRingUsedElem ring[0]; 55 } VRingUsed; 56 57 typedef struct VRing 58 { 59 unsigned int num; 60 unsigned int align; 61 hwaddr desc; 62 hwaddr avail; 63 hwaddr used; 64 } VRing; 65 66 struct VirtQueue 67 { 68 VRing vring; 69 hwaddr pa; 70 uint16_t last_avail_idx; 71 /* Last used index value we have signalled on */ 72 uint16_t signalled_used; 73 74 /* Last used index value we have signalled on */ 75 bool signalled_used_valid; 76 77 /* Notification enabled? */ 78 bool notification; 79 80 uint16_t queue_index; 81 82 int inuse; 83 84 uint16_t vector; 85 void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); 86 VirtIODevice *vdev; 87 EventNotifier guest_notifier; 88 EventNotifier host_notifier; 89 }; 90 91 /* virt queue functions */ 92 static void virtqueue_init(VirtQueue *vq) 93 { 94 hwaddr pa = vq->pa; 95 96 vq->vring.desc = pa; 97 vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc); 98 vq->vring.used = vring_align(vq->vring.avail + 99 offsetof(VRingAvail, ring[vq->vring.num]), 100 vq->vring.align); 101 } 102 103 static inline uint64_t vring_desc_addr(hwaddr desc_pa, int i) 104 { 105 hwaddr pa; 106 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr); 107 return ldq_phys(pa); 108 } 109 110 static inline uint32_t vring_desc_len(hwaddr desc_pa, int i) 111 { 112 hwaddr pa; 113 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len); 114 return ldl_phys(pa); 115 } 116 117 static inline uint16_t vring_desc_flags(hwaddr desc_pa, int i) 118 { 119 hwaddr pa; 120 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags); 121 return lduw_phys(pa); 122 } 123 124 static inline uint16_t vring_desc_next(hwaddr desc_pa, int i) 125 { 126 hwaddr pa; 127 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next); 128 return lduw_phys(pa); 129 } 130 131 static inline uint16_t vring_avail_flags(VirtQueue *vq) 132 { 133 hwaddr pa; 134 pa = vq->vring.avail + offsetof(VRingAvail, flags); 135 return lduw_phys(pa); 136 } 137 138 static inline uint16_t vring_avail_idx(VirtQueue *vq) 139 { 140 hwaddr pa; 141 pa = vq->vring.avail + offsetof(VRingAvail, idx); 142 return lduw_phys(pa); 143 } 144 145 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) 146 { 147 hwaddr pa; 148 pa = vq->vring.avail + offsetof(VRingAvail, ring[i]); 149 return lduw_phys(pa); 150 } 151 152 static inline uint16_t vring_used_event(VirtQueue *vq) 153 { 154 return vring_avail_ring(vq, vq->vring.num); 155 } 156 157 static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val) 158 { 159 hwaddr pa; 160 pa = vq->vring.used + offsetof(VRingUsed, ring[i].id); 161 stl_phys(pa, val); 162 } 163 164 static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val) 165 { 166 hwaddr pa; 167 pa = vq->vring.used + offsetof(VRingUsed, ring[i].len); 168 stl_phys(pa, val); 169 } 170 171 static uint16_t vring_used_idx(VirtQueue *vq) 172 { 173 hwaddr pa; 174 pa = vq->vring.used + offsetof(VRingUsed, idx); 175 return lduw_phys(pa); 176 } 177 178 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) 179 { 180 hwaddr pa; 181 pa = vq->vring.used + offsetof(VRingUsed, idx); 182 stw_phys(pa, val); 183 } 184 185 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) 186 { 187 hwaddr pa; 188 pa = vq->vring.used + offsetof(VRingUsed, flags); 189 stw_phys(pa, lduw_phys(pa) | mask); 190 } 191 192 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) 193 { 194 hwaddr pa; 195 pa = vq->vring.used + offsetof(VRingUsed, flags); 196 stw_phys(pa, lduw_phys(pa) & ~mask); 197 } 198 199 static inline void vring_avail_event(VirtQueue *vq, uint16_t val) 200 { 201 hwaddr pa; 202 if (!vq->notification) { 203 return; 204 } 205 pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]); 206 stw_phys(pa, val); 207 } 208 209 void virtio_queue_set_notification(VirtQueue *vq, int enable) 210 { 211 vq->notification = enable; 212 if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) { 213 vring_avail_event(vq, vring_avail_idx(vq)); 214 } else if (enable) { 215 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); 216 } else { 217 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); 218 } 219 if (enable) { 220 /* Expose avail event/used flags before caller checks the avail idx. */ 221 smp_mb(); 222 } 223 } 224 225 int virtio_queue_ready(VirtQueue *vq) 226 { 227 return vq->vring.avail != 0; 228 } 229 230 int virtio_queue_empty(VirtQueue *vq) 231 { 232 return vring_avail_idx(vq) == vq->last_avail_idx; 233 } 234 235 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, 236 unsigned int len, unsigned int idx) 237 { 238 unsigned int offset; 239 int i; 240 241 trace_virtqueue_fill(vq, elem, len, idx); 242 243 offset = 0; 244 for (i = 0; i < elem->in_num; i++) { 245 size_t size = MIN(len - offset, elem->in_sg[i].iov_len); 246 247 cpu_physical_memory_unmap(elem->in_sg[i].iov_base, 248 elem->in_sg[i].iov_len, 249 1, size); 250 251 offset += size; 252 } 253 254 for (i = 0; i < elem->out_num; i++) 255 cpu_physical_memory_unmap(elem->out_sg[i].iov_base, 256 elem->out_sg[i].iov_len, 257 0, elem->out_sg[i].iov_len); 258 259 idx = (idx + vring_used_idx(vq)) % vq->vring.num; 260 261 /* Get a pointer to the next entry in the used ring. */ 262 vring_used_ring_id(vq, idx, elem->index); 263 vring_used_ring_len(vq, idx, len); 264 } 265 266 void virtqueue_flush(VirtQueue *vq, unsigned int count) 267 { 268 uint16_t old, new; 269 /* Make sure buffer is written before we update index. */ 270 smp_wmb(); 271 trace_virtqueue_flush(vq, count); 272 old = vring_used_idx(vq); 273 new = old + count; 274 vring_used_idx_set(vq, new); 275 vq->inuse -= count; 276 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) 277 vq->signalled_used_valid = false; 278 } 279 280 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, 281 unsigned int len) 282 { 283 virtqueue_fill(vq, elem, len, 0); 284 virtqueue_flush(vq, 1); 285 } 286 287 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) 288 { 289 uint16_t num_heads = vring_avail_idx(vq) - idx; 290 291 /* Check it isn't doing very strange things with descriptor numbers. */ 292 if (num_heads > vq->vring.num) { 293 error_report("Guest moved used index from %u to %u", 294 idx, vring_avail_idx(vq)); 295 exit(1); 296 } 297 /* On success, callers read a descriptor at vq->last_avail_idx. 298 * Make sure descriptor read does not bypass avail index read. */ 299 if (num_heads) { 300 smp_rmb(); 301 } 302 303 return num_heads; 304 } 305 306 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) 307 { 308 unsigned int head; 309 310 /* Grab the next descriptor number they're advertising, and increment 311 * the index we've seen. */ 312 head = vring_avail_ring(vq, idx % vq->vring.num); 313 314 /* If their number is silly, that's a fatal mistake. */ 315 if (head >= vq->vring.num) { 316 error_report("Guest says index %u is available", head); 317 exit(1); 318 } 319 320 return head; 321 } 322 323 static unsigned virtqueue_next_desc(hwaddr desc_pa, 324 unsigned int i, unsigned int max) 325 { 326 unsigned int next; 327 328 /* If this descriptor says it doesn't chain, we're done. */ 329 if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT)) 330 return max; 331 332 /* Check they're not leading us off end of descriptors. */ 333 next = vring_desc_next(desc_pa, i); 334 /* Make sure compiler knows to grab that: we don't want it changing! */ 335 smp_wmb(); 336 337 if (next >= max) { 338 error_report("Desc next is %u", next); 339 exit(1); 340 } 341 342 return next; 343 } 344 345 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, 346 unsigned int *out_bytes, 347 unsigned max_in_bytes, unsigned max_out_bytes) 348 { 349 unsigned int idx; 350 unsigned int total_bufs, in_total, out_total; 351 352 idx = vq->last_avail_idx; 353 354 total_bufs = in_total = out_total = 0; 355 while (virtqueue_num_heads(vq, idx)) { 356 unsigned int max, num_bufs, indirect = 0; 357 hwaddr desc_pa; 358 int i; 359 360 max = vq->vring.num; 361 num_bufs = total_bufs; 362 i = virtqueue_get_head(vq, idx++); 363 desc_pa = vq->vring.desc; 364 365 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { 366 if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { 367 error_report("Invalid size for indirect buffer table"); 368 exit(1); 369 } 370 371 /* If we've got too many, that implies a descriptor loop. */ 372 if (num_bufs >= max) { 373 error_report("Looped descriptor"); 374 exit(1); 375 } 376 377 /* loop over the indirect descriptor table */ 378 indirect = 1; 379 max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc); 380 num_bufs = i = 0; 381 desc_pa = vring_desc_addr(desc_pa, i); 382 } 383 384 do { 385 /* If we've got too many, that implies a descriptor loop. */ 386 if (++num_bufs > max) { 387 error_report("Looped descriptor"); 388 exit(1); 389 } 390 391 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) { 392 in_total += vring_desc_len(desc_pa, i); 393 } else { 394 out_total += vring_desc_len(desc_pa, i); 395 } 396 if (in_total >= max_in_bytes && out_total >= max_out_bytes) { 397 goto done; 398 } 399 } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max); 400 401 if (!indirect) 402 total_bufs = num_bufs; 403 else 404 total_bufs++; 405 } 406 done: 407 if (in_bytes) { 408 *in_bytes = in_total; 409 } 410 if (out_bytes) { 411 *out_bytes = out_total; 412 } 413 } 414 415 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, 416 unsigned int out_bytes) 417 { 418 unsigned int in_total, out_total; 419 420 virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes); 421 return in_bytes <= in_total && out_bytes <= out_total; 422 } 423 424 void virtqueue_map_sg(struct iovec *sg, hwaddr *addr, 425 size_t num_sg, int is_write) 426 { 427 unsigned int i; 428 hwaddr len; 429 430 for (i = 0; i < num_sg; i++) { 431 len = sg[i].iov_len; 432 sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write); 433 if (sg[i].iov_base == NULL || len != sg[i].iov_len) { 434 error_report("virtio: trying to map MMIO memory"); 435 exit(1); 436 } 437 } 438 } 439 440 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) 441 { 442 unsigned int i, head, max; 443 hwaddr desc_pa = vq->vring.desc; 444 445 if (!virtqueue_num_heads(vq, vq->last_avail_idx)) 446 return 0; 447 448 /* When we start there are none of either input nor output. */ 449 elem->out_num = elem->in_num = 0; 450 451 max = vq->vring.num; 452 453 i = head = virtqueue_get_head(vq, vq->last_avail_idx++); 454 if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) { 455 vring_avail_event(vq, vring_avail_idx(vq)); 456 } 457 458 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { 459 if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { 460 error_report("Invalid size for indirect buffer table"); 461 exit(1); 462 } 463 464 /* loop over the indirect descriptor table */ 465 max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc); 466 desc_pa = vring_desc_addr(desc_pa, i); 467 i = 0; 468 } 469 470 /* Collect all the descriptors */ 471 do { 472 struct iovec *sg; 473 474 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) { 475 if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) { 476 error_report("Too many write descriptors in indirect table"); 477 exit(1); 478 } 479 elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i); 480 sg = &elem->in_sg[elem->in_num++]; 481 } else { 482 if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) { 483 error_report("Too many read descriptors in indirect table"); 484 exit(1); 485 } 486 elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i); 487 sg = &elem->out_sg[elem->out_num++]; 488 } 489 490 sg->iov_len = vring_desc_len(desc_pa, i); 491 492 /* If we've got too many, that implies a descriptor loop. */ 493 if ((elem->in_num + elem->out_num) > max) { 494 error_report("Looped descriptor"); 495 exit(1); 496 } 497 } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max); 498 499 /* Now map what we have collected */ 500 virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1); 501 virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0); 502 503 elem->index = head; 504 505 vq->inuse++; 506 507 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); 508 return elem->in_num + elem->out_num; 509 } 510 511 /* virtio device */ 512 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector) 513 { 514 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 515 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 516 517 if (k->notify) { 518 k->notify(qbus->parent, vector); 519 } 520 } 521 522 void virtio_update_irq(VirtIODevice *vdev) 523 { 524 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 525 } 526 527 void virtio_set_status(VirtIODevice *vdev, uint8_t val) 528 { 529 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 530 trace_virtio_set_status(vdev, val); 531 532 if (k->set_status) { 533 k->set_status(vdev, val); 534 } 535 vdev->status = val; 536 } 537 538 void virtio_reset(void *opaque) 539 { 540 VirtIODevice *vdev = opaque; 541 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 542 int i; 543 544 virtio_set_status(vdev, 0); 545 546 if (k->reset) { 547 k->reset(vdev); 548 } 549 550 vdev->guest_features = 0; 551 vdev->queue_sel = 0; 552 vdev->status = 0; 553 vdev->isr = 0; 554 vdev->config_vector = VIRTIO_NO_VECTOR; 555 virtio_notify_vector(vdev, vdev->config_vector); 556 557 for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { 558 vdev->vq[i].vring.desc = 0; 559 vdev->vq[i].vring.avail = 0; 560 vdev->vq[i].vring.used = 0; 561 vdev->vq[i].last_avail_idx = 0; 562 vdev->vq[i].pa = 0; 563 vdev->vq[i].vector = VIRTIO_NO_VECTOR; 564 vdev->vq[i].signalled_used = 0; 565 vdev->vq[i].signalled_used_valid = false; 566 vdev->vq[i].notification = true; 567 } 568 } 569 570 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr) 571 { 572 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 573 uint8_t val; 574 575 if (addr + sizeof(val) > vdev->config_len) { 576 return (uint32_t)-1; 577 } 578 579 k->get_config(vdev, vdev->config); 580 581 val = ldub_p(vdev->config + addr); 582 return val; 583 } 584 585 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr) 586 { 587 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 588 uint16_t val; 589 590 if (addr + sizeof(val) > vdev->config_len) { 591 return (uint32_t)-1; 592 } 593 594 k->get_config(vdev, vdev->config); 595 596 val = lduw_p(vdev->config + addr); 597 return val; 598 } 599 600 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr) 601 { 602 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 603 uint32_t val; 604 605 if (addr + sizeof(val) > vdev->config_len) { 606 return (uint32_t)-1; 607 } 608 609 k->get_config(vdev, vdev->config); 610 611 val = ldl_p(vdev->config + addr); 612 return val; 613 } 614 615 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data) 616 { 617 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 618 uint8_t val = data; 619 620 if (addr + sizeof(val) > vdev->config_len) { 621 return; 622 } 623 624 stb_p(vdev->config + addr, val); 625 626 if (k->set_config) { 627 k->set_config(vdev, vdev->config); 628 } 629 } 630 631 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data) 632 { 633 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 634 uint16_t val = data; 635 636 if (addr + sizeof(val) > vdev->config_len) { 637 return; 638 } 639 640 stw_p(vdev->config + addr, val); 641 642 if (k->set_config) { 643 k->set_config(vdev, vdev->config); 644 } 645 } 646 647 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data) 648 { 649 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 650 uint32_t val = data; 651 652 if (addr + sizeof(val) > vdev->config_len) { 653 return; 654 } 655 656 stl_p(vdev->config + addr, val); 657 658 if (k->set_config) { 659 k->set_config(vdev, vdev->config); 660 } 661 } 662 663 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr) 664 { 665 vdev->vq[n].pa = addr; 666 virtqueue_init(&vdev->vq[n]); 667 } 668 669 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n) 670 { 671 return vdev->vq[n].pa; 672 } 673 674 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) 675 { 676 if (num <= VIRTQUEUE_MAX_SIZE) { 677 vdev->vq[n].vring.num = num; 678 virtqueue_init(&vdev->vq[n]); 679 } 680 } 681 682 int virtio_queue_get_num(VirtIODevice *vdev, int n) 683 { 684 return vdev->vq[n].vring.num; 685 } 686 687 int virtio_queue_get_id(VirtQueue *vq) 688 { 689 VirtIODevice *vdev = vq->vdev; 690 assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]); 691 return vq - &vdev->vq[0]; 692 } 693 694 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) 695 { 696 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 697 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 698 699 /* Check that the transport told us it was going to do this 700 * (so a buggy transport will immediately assert rather than 701 * silently failing to migrate this state) 702 */ 703 assert(k->has_variable_vring_alignment); 704 705 vdev->vq[n].vring.align = align; 706 virtqueue_init(&vdev->vq[n]); 707 } 708 709 void virtio_queue_notify_vq(VirtQueue *vq) 710 { 711 if (vq->vring.desc) { 712 VirtIODevice *vdev = vq->vdev; 713 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); 714 vq->handle_output(vdev, vq); 715 } 716 } 717 718 void virtio_queue_notify(VirtIODevice *vdev, int n) 719 { 720 virtio_queue_notify_vq(&vdev->vq[n]); 721 } 722 723 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) 724 { 725 return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector : 726 VIRTIO_NO_VECTOR; 727 } 728 729 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector) 730 { 731 if (n < VIRTIO_PCI_QUEUE_MAX) 732 vdev->vq[n].vector = vector; 733 } 734 735 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, 736 void (*handle_output)(VirtIODevice *, VirtQueue *)) 737 { 738 int i; 739 740 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { 741 if (vdev->vq[i].vring.num == 0) 742 break; 743 } 744 745 if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) 746 abort(); 747 748 vdev->vq[i].vring.num = queue_size; 749 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN; 750 vdev->vq[i].handle_output = handle_output; 751 752 return &vdev->vq[i]; 753 } 754 755 void virtio_del_queue(VirtIODevice *vdev, int n) 756 { 757 if (n < 0 || n >= VIRTIO_PCI_QUEUE_MAX) { 758 abort(); 759 } 760 761 vdev->vq[n].vring.num = 0; 762 } 763 764 void virtio_irq(VirtQueue *vq) 765 { 766 trace_virtio_irq(vq); 767 vq->vdev->isr |= 0x01; 768 virtio_notify_vector(vq->vdev, vq->vector); 769 } 770 771 /* Assuming a given event_idx value from the other size, if 772 * we have just incremented index from old to new_idx, 773 * should we trigger an event? */ 774 static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old) 775 { 776 /* Note: Xen has similar logic for notification hold-off 777 * in include/xen/interface/io/ring.h with req_event and req_prod 778 * corresponding to event_idx + 1 and new respectively. 779 * Note also that req_event and req_prod in Xen start at 1, 780 * event indexes in virtio start at 0. */ 781 return (uint16_t)(new - event - 1) < (uint16_t)(new - old); 782 } 783 784 static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq) 785 { 786 uint16_t old, new; 787 bool v; 788 /* We need to expose used array entries before checking used event. */ 789 smp_mb(); 790 /* Always notify when queue is empty (when feature acknowledge) */ 791 if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) && 792 !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) { 793 return true; 794 } 795 796 if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) { 797 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); 798 } 799 800 v = vq->signalled_used_valid; 801 vq->signalled_used_valid = true; 802 old = vq->signalled_used; 803 new = vq->signalled_used = vring_used_idx(vq); 804 return !v || vring_need_event(vring_used_event(vq), new, old); 805 } 806 807 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) 808 { 809 if (!vring_notify(vdev, vq)) { 810 return; 811 } 812 813 trace_virtio_notify(vdev, vq); 814 vdev->isr |= 0x01; 815 virtio_notify_vector(vdev, vq->vector); 816 } 817 818 void virtio_notify_config(VirtIODevice *vdev) 819 { 820 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 821 return; 822 823 vdev->isr |= 0x03; 824 virtio_notify_vector(vdev, vdev->config_vector); 825 } 826 827 void virtio_save(VirtIODevice *vdev, QEMUFile *f) 828 { 829 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 830 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 831 int i; 832 833 if (k->save_config) { 834 k->save_config(qbus->parent, f); 835 } 836 837 qemu_put_8s(f, &vdev->status); 838 qemu_put_8s(f, &vdev->isr); 839 qemu_put_be16s(f, &vdev->queue_sel); 840 qemu_put_be32s(f, &vdev->guest_features); 841 qemu_put_be32(f, vdev->config_len); 842 qemu_put_buffer(f, vdev->config, vdev->config_len); 843 844 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { 845 if (vdev->vq[i].vring.num == 0) 846 break; 847 } 848 849 qemu_put_be32(f, i); 850 851 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { 852 if (vdev->vq[i].vring.num == 0) 853 break; 854 855 qemu_put_be32(f, vdev->vq[i].vring.num); 856 if (k->has_variable_vring_alignment) { 857 qemu_put_be32(f, vdev->vq[i].vring.align); 858 } 859 qemu_put_be64(f, vdev->vq[i].pa); 860 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); 861 if (k->save_queue) { 862 k->save_queue(qbus->parent, i, f); 863 } 864 } 865 } 866 867 int virtio_set_features(VirtIODevice *vdev, uint32_t val) 868 { 869 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 870 VirtioBusClass *vbusk = VIRTIO_BUS_GET_CLASS(qbus); 871 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); 872 uint32_t supported_features = vbusk->get_features(qbus->parent); 873 bool bad = (val & ~supported_features) != 0; 874 875 val &= supported_features; 876 if (k->set_features) { 877 k->set_features(vdev, val); 878 } 879 vdev->guest_features = val; 880 return bad ? -1 : 0; 881 } 882 883 int virtio_load(VirtIODevice *vdev, QEMUFile *f) 884 { 885 int num, i, ret; 886 uint32_t features; 887 uint32_t supported_features; 888 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 889 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 890 891 if (k->load_config) { 892 ret = k->load_config(qbus->parent, f); 893 if (ret) 894 return ret; 895 } 896 897 qemu_get_8s(f, &vdev->status); 898 qemu_get_8s(f, &vdev->isr); 899 qemu_get_be16s(f, &vdev->queue_sel); 900 qemu_get_be32s(f, &features); 901 902 if (virtio_set_features(vdev, features) < 0) { 903 supported_features = k->get_features(qbus->parent); 904 error_report("Features 0x%x unsupported. Allowed features: 0x%x", 905 features, supported_features); 906 return -1; 907 } 908 vdev->config_len = qemu_get_be32(f); 909 qemu_get_buffer(f, vdev->config, vdev->config_len); 910 911 num = qemu_get_be32(f); 912 913 for (i = 0; i < num; i++) { 914 vdev->vq[i].vring.num = qemu_get_be32(f); 915 if (k->has_variable_vring_alignment) { 916 vdev->vq[i].vring.align = qemu_get_be32(f); 917 } 918 vdev->vq[i].pa = qemu_get_be64(f); 919 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); 920 vdev->vq[i].signalled_used_valid = false; 921 vdev->vq[i].notification = true; 922 923 if (vdev->vq[i].pa) { 924 uint16_t nheads; 925 virtqueue_init(&vdev->vq[i]); 926 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx; 927 /* Check it isn't doing very strange things with descriptor numbers. */ 928 if (nheads > vdev->vq[i].vring.num) { 929 error_report("VQ %d size 0x%x Guest index 0x%x " 930 "inconsistent with Host index 0x%x: delta 0x%x", 931 i, vdev->vq[i].vring.num, 932 vring_avail_idx(&vdev->vq[i]), 933 vdev->vq[i].last_avail_idx, nheads); 934 return -1; 935 } 936 } else if (vdev->vq[i].last_avail_idx) { 937 error_report("VQ %d address 0x0 " 938 "inconsistent with Host index 0x%x", 939 i, vdev->vq[i].last_avail_idx); 940 return -1; 941 } 942 if (k->load_queue) { 943 ret = k->load_queue(qbus->parent, i, f); 944 if (ret) 945 return ret; 946 } 947 } 948 949 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); 950 return 0; 951 } 952 953 void virtio_cleanup(VirtIODevice *vdev) 954 { 955 qemu_del_vm_change_state_handler(vdev->vmstate); 956 g_free(vdev->config); 957 g_free(vdev->vq); 958 } 959 960 static void virtio_vmstate_change(void *opaque, int running, RunState state) 961 { 962 VirtIODevice *vdev = opaque; 963 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); 964 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); 965 bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); 966 vdev->vm_running = running; 967 968 if (backend_run) { 969 virtio_set_status(vdev, vdev->status); 970 } 971 972 if (k->vmstate_change) { 973 k->vmstate_change(qbus->parent, backend_run); 974 } 975 976 if (!backend_run) { 977 virtio_set_status(vdev, vdev->status); 978 } 979 } 980 981 void virtio_init(VirtIODevice *vdev, const char *name, 982 uint16_t device_id, size_t config_size) 983 { 984 int i; 985 vdev->device_id = device_id; 986 vdev->status = 0; 987 vdev->isr = 0; 988 vdev->queue_sel = 0; 989 vdev->config_vector = VIRTIO_NO_VECTOR; 990 vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX); 991 vdev->vm_running = runstate_is_running(); 992 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { 993 vdev->vq[i].vector = VIRTIO_NO_VECTOR; 994 vdev->vq[i].vdev = vdev; 995 vdev->vq[i].queue_index = i; 996 } 997 998 vdev->name = name; 999 vdev->config_len = config_size; 1000 if (vdev->config_len) { 1001 vdev->config = g_malloc0(config_size); 1002 } else { 1003 vdev->config = NULL; 1004 } 1005 vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, 1006 vdev); 1007 } 1008 1009 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) 1010 { 1011 return vdev->vq[n].vring.desc; 1012 } 1013 1014 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n) 1015 { 1016 return vdev->vq[n].vring.avail; 1017 } 1018 1019 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n) 1020 { 1021 return vdev->vq[n].vring.used; 1022 } 1023 1024 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n) 1025 { 1026 return vdev->vq[n].vring.desc; 1027 } 1028 1029 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) 1030 { 1031 return sizeof(VRingDesc) * vdev->vq[n].vring.num; 1032 } 1033 1034 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) 1035 { 1036 return offsetof(VRingAvail, ring) + 1037 sizeof(uint64_t) * vdev->vq[n].vring.num; 1038 } 1039 1040 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) 1041 { 1042 return offsetof(VRingUsed, ring) + 1043 sizeof(VRingUsedElem) * vdev->vq[n].vring.num; 1044 } 1045 1046 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n) 1047 { 1048 return vdev->vq[n].vring.used - vdev->vq[n].vring.desc + 1049 virtio_queue_get_used_size(vdev, n); 1050 } 1051 1052 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) 1053 { 1054 return vdev->vq[n].last_avail_idx; 1055 } 1056 1057 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) 1058 { 1059 vdev->vq[n].last_avail_idx = idx; 1060 } 1061 1062 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n) 1063 { 1064 return vdev->vq + n; 1065 } 1066 1067 uint16_t virtio_get_queue_index(VirtQueue *vq) 1068 { 1069 return vq->queue_index; 1070 } 1071 1072 static void virtio_queue_guest_notifier_read(EventNotifier *n) 1073 { 1074 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier); 1075 if (event_notifier_test_and_clear(n)) { 1076 virtio_irq(vq); 1077 } 1078 } 1079 1080 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, 1081 bool with_irqfd) 1082 { 1083 if (assign && !with_irqfd) { 1084 event_notifier_set_handler(&vq->guest_notifier, 1085 virtio_queue_guest_notifier_read); 1086 } else { 1087 event_notifier_set_handler(&vq->guest_notifier, NULL); 1088 } 1089 if (!assign) { 1090 /* Test and clear notifier before closing it, 1091 * in case poll callback didn't have time to run. */ 1092 virtio_queue_guest_notifier_read(&vq->guest_notifier); 1093 } 1094 } 1095 1096 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) 1097 { 1098 return &vq->guest_notifier; 1099 } 1100 1101 static void virtio_queue_host_notifier_read(EventNotifier *n) 1102 { 1103 VirtQueue *vq = container_of(n, VirtQueue, host_notifier); 1104 if (event_notifier_test_and_clear(n)) { 1105 virtio_queue_notify_vq(vq); 1106 } 1107 } 1108 1109 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, 1110 bool set_handler) 1111 { 1112 if (assign && set_handler) { 1113 event_notifier_set_handler(&vq->host_notifier, 1114 virtio_queue_host_notifier_read); 1115 } else { 1116 event_notifier_set_handler(&vq->host_notifier, NULL); 1117 } 1118 if (!assign) { 1119 /* Test and clear notifier before after disabling event, 1120 * in case poll callback didn't have time to run. */ 1121 virtio_queue_host_notifier_read(&vq->host_notifier); 1122 } 1123 } 1124 1125 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) 1126 { 1127 return &vq->host_notifier; 1128 } 1129 1130 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) 1131 { 1132 if (vdev->bus_name) { 1133 g_free(vdev->bus_name); 1134 vdev->bus_name = NULL; 1135 } 1136 1137 if (bus_name) { 1138 vdev->bus_name = g_strdup(bus_name); 1139 } 1140 } 1141 1142 static int virtio_device_init(DeviceState *qdev) 1143 { 1144 VirtIODevice *vdev = VIRTIO_DEVICE(qdev); 1145 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(qdev); 1146 assert(k->init != NULL); 1147 if (k->init(vdev) < 0) { 1148 return -1; 1149 } 1150 virtio_bus_plug_device(vdev); 1151 return 0; 1152 } 1153 1154 static int virtio_device_exit(DeviceState *qdev) 1155 { 1156 VirtIODevice *vdev = VIRTIO_DEVICE(qdev); 1157 1158 if (vdev->bus_name) { 1159 g_free(vdev->bus_name); 1160 vdev->bus_name = NULL; 1161 } 1162 return 0; 1163 } 1164 1165 static void virtio_device_class_init(ObjectClass *klass, void *data) 1166 { 1167 /* Set the default value here. */ 1168 DeviceClass *dc = DEVICE_CLASS(klass); 1169 dc->init = virtio_device_init; 1170 dc->exit = virtio_device_exit; 1171 dc->bus_type = TYPE_VIRTIO_BUS; 1172 } 1173 1174 static const TypeInfo virtio_device_info = { 1175 .name = TYPE_VIRTIO_DEVICE, 1176 .parent = TYPE_DEVICE, 1177 .instance_size = sizeof(VirtIODevice), 1178 .class_init = virtio_device_class_init, 1179 .abstract = true, 1180 .class_size = sizeof(VirtioDeviceClass), 1181 }; 1182 1183 static void virtio_register_types(void) 1184 { 1185 type_register_static(&virtio_device_info); 1186 } 1187 1188 type_init(virtio_register_types) 1189