xref: /openbmc/qemu/hw/virtio/virtio.c (revision 0fbb5d2d)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "cpu.h"
17 #include "trace.h"
18 #include "qemu/error-report.h"
19 #include "qemu/log.h"
20 #include "qemu/main-loop.h"
21 #include "qemu/module.h"
22 #include "hw/virtio/virtio.h"
23 #include "migration/qemu-file-types.h"
24 #include "qemu/atomic.h"
25 #include "hw/virtio/virtio-bus.h"
26 #include "hw/qdev-properties.h"
27 #include "hw/virtio/virtio-access.h"
28 #include "sysemu/dma.h"
29 #include "sysemu/runstate.h"
30 #include "standard-headers/linux/virtio_ids.h"
31 
32 /*
33  * The alignment to use between consumer and producer parts of vring.
34  * x86 pagesize again. This is the default, used by transports like PCI
35  * which don't provide a means for the guest to tell the host the alignment.
36  */
37 #define VIRTIO_PCI_VRING_ALIGN         4096
38 
39 typedef struct VRingDesc
40 {
41     uint64_t addr;
42     uint32_t len;
43     uint16_t flags;
44     uint16_t next;
45 } VRingDesc;
46 
47 typedef struct VRingPackedDesc {
48     uint64_t addr;
49     uint32_t len;
50     uint16_t id;
51     uint16_t flags;
52 } VRingPackedDesc;
53 
54 typedef struct VRingAvail
55 {
56     uint16_t flags;
57     uint16_t idx;
58     uint16_t ring[];
59 } VRingAvail;
60 
61 typedef struct VRingUsedElem
62 {
63     uint32_t id;
64     uint32_t len;
65 } VRingUsedElem;
66 
67 typedef struct VRingUsed
68 {
69     uint16_t flags;
70     uint16_t idx;
71     VRingUsedElem ring[];
72 } VRingUsed;
73 
74 typedef struct VRingMemoryRegionCaches {
75     struct rcu_head rcu;
76     MemoryRegionCache desc;
77     MemoryRegionCache avail;
78     MemoryRegionCache used;
79 } VRingMemoryRegionCaches;
80 
81 typedef struct VRing
82 {
83     unsigned int num;
84     unsigned int num_default;
85     unsigned int align;
86     hwaddr desc;
87     hwaddr avail;
88     hwaddr used;
89     VRingMemoryRegionCaches *caches;
90 } VRing;
91 
92 typedef struct VRingPackedDescEvent {
93     uint16_t off_wrap;
94     uint16_t flags;
95 } VRingPackedDescEvent ;
96 
97 struct VirtQueue
98 {
99     VRing vring;
100     VirtQueueElement *used_elems;
101 
102     /* Next head to pop */
103     uint16_t last_avail_idx;
104     bool last_avail_wrap_counter;
105 
106     /* Last avail_idx read from VQ. */
107     uint16_t shadow_avail_idx;
108     bool shadow_avail_wrap_counter;
109 
110     uint16_t used_idx;
111     bool used_wrap_counter;
112 
113     /* Last used index value we have signalled on */
114     uint16_t signalled_used;
115 
116     /* Last used index value we have signalled on */
117     bool signalled_used_valid;
118 
119     /* Notification enabled? */
120     bool notification;
121 
122     uint16_t queue_index;
123 
124     unsigned int inuse;
125 
126     uint16_t vector;
127     VirtIOHandleOutput handle_output;
128     VirtIOHandleAIOOutput handle_aio_output;
129     VirtIODevice *vdev;
130     EventNotifier guest_notifier;
131     EventNotifier host_notifier;
132     bool host_notifier_enabled;
133     QLIST_ENTRY(VirtQueue) node;
134 };
135 
136 /* Called within call_rcu().  */
137 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
138 {
139     assert(caches != NULL);
140     address_space_cache_destroy(&caches->desc);
141     address_space_cache_destroy(&caches->avail);
142     address_space_cache_destroy(&caches->used);
143     g_free(caches);
144 }
145 
146 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
147 {
148     VRingMemoryRegionCaches *caches;
149 
150     caches = qatomic_read(&vq->vring.caches);
151     qatomic_rcu_set(&vq->vring.caches, NULL);
152     if (caches) {
153         call_rcu(caches, virtio_free_region_cache, rcu);
154     }
155 }
156 
157 static void virtio_init_region_cache(VirtIODevice *vdev, int n)
158 {
159     VirtQueue *vq = &vdev->vq[n];
160     VRingMemoryRegionCaches *old = vq->vring.caches;
161     VRingMemoryRegionCaches *new = NULL;
162     hwaddr addr, size;
163     int64_t len;
164     bool packed;
165 
166 
167     addr = vq->vring.desc;
168     if (!addr) {
169         goto out_no_cache;
170     }
171     new = g_new0(VRingMemoryRegionCaches, 1);
172     size = virtio_queue_get_desc_size(vdev, n);
173     packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
174                                    true : false;
175     len = address_space_cache_init(&new->desc, vdev->dma_as,
176                                    addr, size, packed);
177     if (len < size) {
178         virtio_error(vdev, "Cannot map desc");
179         goto err_desc;
180     }
181 
182     size = virtio_queue_get_used_size(vdev, n);
183     len = address_space_cache_init(&new->used, vdev->dma_as,
184                                    vq->vring.used, size, true);
185     if (len < size) {
186         virtio_error(vdev, "Cannot map used");
187         goto err_used;
188     }
189 
190     size = virtio_queue_get_avail_size(vdev, n);
191     len = address_space_cache_init(&new->avail, vdev->dma_as,
192                                    vq->vring.avail, size, false);
193     if (len < size) {
194         virtio_error(vdev, "Cannot map avail");
195         goto err_avail;
196     }
197 
198     qatomic_rcu_set(&vq->vring.caches, new);
199     if (old) {
200         call_rcu(old, virtio_free_region_cache, rcu);
201     }
202     return;
203 
204 err_avail:
205     address_space_cache_destroy(&new->avail);
206 err_used:
207     address_space_cache_destroy(&new->used);
208 err_desc:
209     address_space_cache_destroy(&new->desc);
210 out_no_cache:
211     g_free(new);
212     virtio_virtqueue_reset_region_cache(vq);
213 }
214 
215 /* virt queue functions */
216 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
217 {
218     VRing *vring = &vdev->vq[n].vring;
219 
220     if (!vring->num || !vring->desc || !vring->align) {
221         /* not yet setup -> nothing to do */
222         return;
223     }
224     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
225     vring->used = vring_align(vring->avail +
226                               offsetof(VRingAvail, ring[vring->num]),
227                               vring->align);
228     virtio_init_region_cache(vdev, n);
229 }
230 
231 /* Called within rcu_read_lock().  */
232 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
233                                   MemoryRegionCache *cache, int i)
234 {
235     address_space_read_cached(cache, i * sizeof(VRingDesc),
236                               desc, sizeof(VRingDesc));
237     virtio_tswap64s(vdev, &desc->addr);
238     virtio_tswap32s(vdev, &desc->len);
239     virtio_tswap16s(vdev, &desc->flags);
240     virtio_tswap16s(vdev, &desc->next);
241 }
242 
243 static void vring_packed_event_read(VirtIODevice *vdev,
244                                     MemoryRegionCache *cache,
245                                     VRingPackedDescEvent *e)
246 {
247     hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
248     hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
249 
250     e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
251     /* Make sure flags is seen before off_wrap */
252     smp_rmb();
253     e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
254     virtio_tswap16s(vdev, &e->flags);
255 }
256 
257 static void vring_packed_off_wrap_write(VirtIODevice *vdev,
258                                         MemoryRegionCache *cache,
259                                         uint16_t off_wrap)
260 {
261     hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
262 
263     virtio_stw_phys_cached(vdev, cache, off, off_wrap);
264     address_space_cache_invalidate(cache, off, sizeof(off_wrap));
265 }
266 
267 static void vring_packed_flags_write(VirtIODevice *vdev,
268                                      MemoryRegionCache *cache, uint16_t flags)
269 {
270     hwaddr off = offsetof(VRingPackedDescEvent, flags);
271 
272     virtio_stw_phys_cached(vdev, cache, off, flags);
273     address_space_cache_invalidate(cache, off, sizeof(flags));
274 }
275 
276 /* Called within rcu_read_lock().  */
277 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
278 {
279     return qatomic_rcu_read(&vq->vring.caches);
280 }
281 
282 /* Called within rcu_read_lock().  */
283 static inline uint16_t vring_avail_flags(VirtQueue *vq)
284 {
285     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
286     hwaddr pa = offsetof(VRingAvail, flags);
287 
288     if (!caches) {
289         return 0;
290     }
291 
292     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
293 }
294 
295 /* Called within rcu_read_lock().  */
296 static inline uint16_t vring_avail_idx(VirtQueue *vq)
297 {
298     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
299     hwaddr pa = offsetof(VRingAvail, idx);
300 
301     if (!caches) {
302         return 0;
303     }
304 
305     vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
306     return vq->shadow_avail_idx;
307 }
308 
309 /* Called within rcu_read_lock().  */
310 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
311 {
312     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
313     hwaddr pa = offsetof(VRingAvail, ring[i]);
314 
315     if (!caches) {
316         return 0;
317     }
318 
319     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
320 }
321 
322 /* Called within rcu_read_lock().  */
323 static inline uint16_t vring_get_used_event(VirtQueue *vq)
324 {
325     return vring_avail_ring(vq, vq->vring.num);
326 }
327 
328 /* Called within rcu_read_lock().  */
329 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
330                                     int i)
331 {
332     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
333     hwaddr pa = offsetof(VRingUsed, ring[i]);
334 
335     if (!caches) {
336         return;
337     }
338 
339     virtio_tswap32s(vq->vdev, &uelem->id);
340     virtio_tswap32s(vq->vdev, &uelem->len);
341     address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
342     address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
343 }
344 
345 /* Called within rcu_read_lock().  */
346 static uint16_t vring_used_idx(VirtQueue *vq)
347 {
348     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
349     hwaddr pa = offsetof(VRingUsed, idx);
350 
351     if (!caches) {
352         return 0;
353     }
354 
355     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
356 }
357 
358 /* Called within rcu_read_lock().  */
359 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
360 {
361     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
362     hwaddr pa = offsetof(VRingUsed, idx);
363 
364     if (caches) {
365         virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
366         address_space_cache_invalidate(&caches->used, pa, sizeof(val));
367     }
368 
369     vq->used_idx = val;
370 }
371 
372 /* Called within rcu_read_lock().  */
373 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
374 {
375     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
376     VirtIODevice *vdev = vq->vdev;
377     hwaddr pa = offsetof(VRingUsed, flags);
378     uint16_t flags;
379 
380     if (!caches) {
381         return;
382     }
383 
384     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
385     virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
386     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
387 }
388 
389 /* Called within rcu_read_lock().  */
390 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
391 {
392     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
393     VirtIODevice *vdev = vq->vdev;
394     hwaddr pa = offsetof(VRingUsed, flags);
395     uint16_t flags;
396 
397     if (!caches) {
398         return;
399     }
400 
401     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
402     virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
403     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
404 }
405 
406 /* Called within rcu_read_lock().  */
407 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
408 {
409     VRingMemoryRegionCaches *caches;
410     hwaddr pa;
411     if (!vq->notification) {
412         return;
413     }
414 
415     caches = vring_get_region_caches(vq);
416     if (!caches) {
417         return;
418     }
419 
420     pa = offsetof(VRingUsed, ring[vq->vring.num]);
421     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
422     address_space_cache_invalidate(&caches->used, pa, sizeof(val));
423 }
424 
425 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
426 {
427     RCU_READ_LOCK_GUARD();
428 
429     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
430         vring_set_avail_event(vq, vring_avail_idx(vq));
431     } else if (enable) {
432         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
433     } else {
434         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
435     }
436     if (enable) {
437         /* Expose avail event/used flags before caller checks the avail idx. */
438         smp_mb();
439     }
440 }
441 
442 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
443 {
444     uint16_t off_wrap;
445     VRingPackedDescEvent e;
446     VRingMemoryRegionCaches *caches;
447 
448     RCU_READ_LOCK_GUARD();
449     caches = vring_get_region_caches(vq);
450     if (!caches) {
451         return;
452     }
453 
454     vring_packed_event_read(vq->vdev, &caches->used, &e);
455 
456     if (!enable) {
457         e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
458     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
459         off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
460         vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
461         /* Make sure off_wrap is wrote before flags */
462         smp_wmb();
463         e.flags = VRING_PACKED_EVENT_FLAG_DESC;
464     } else {
465         e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
466     }
467 
468     vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
469     if (enable) {
470         /* Expose avail event/used flags before caller checks the avail idx. */
471         smp_mb();
472     }
473 }
474 
475 bool virtio_queue_get_notification(VirtQueue *vq)
476 {
477     return vq->notification;
478 }
479 
480 void virtio_queue_set_notification(VirtQueue *vq, int enable)
481 {
482     vq->notification = enable;
483 
484     if (!vq->vring.desc) {
485         return;
486     }
487 
488     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
489         virtio_queue_packed_set_notification(vq, enable);
490     } else {
491         virtio_queue_split_set_notification(vq, enable);
492     }
493 }
494 
495 int virtio_queue_ready(VirtQueue *vq)
496 {
497     return vq->vring.avail != 0;
498 }
499 
500 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
501                                          uint16_t *flags,
502                                          MemoryRegionCache *cache,
503                                          int i)
504 {
505     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
506 
507     *flags = virtio_lduw_phys_cached(vdev, cache, off);
508 }
509 
510 static void vring_packed_desc_read(VirtIODevice *vdev,
511                                    VRingPackedDesc *desc,
512                                    MemoryRegionCache *cache,
513                                    int i, bool strict_order)
514 {
515     hwaddr off = i * sizeof(VRingPackedDesc);
516 
517     vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
518 
519     if (strict_order) {
520         /* Make sure flags is read before the rest fields. */
521         smp_rmb();
522     }
523 
524     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
525                               &desc->addr, sizeof(desc->addr));
526     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
527                               &desc->id, sizeof(desc->id));
528     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
529                               &desc->len, sizeof(desc->len));
530     virtio_tswap64s(vdev, &desc->addr);
531     virtio_tswap16s(vdev, &desc->id);
532     virtio_tswap32s(vdev, &desc->len);
533 }
534 
535 static void vring_packed_desc_write_data(VirtIODevice *vdev,
536                                          VRingPackedDesc *desc,
537                                          MemoryRegionCache *cache,
538                                          int i)
539 {
540     hwaddr off_id = i * sizeof(VRingPackedDesc) +
541                     offsetof(VRingPackedDesc, id);
542     hwaddr off_len = i * sizeof(VRingPackedDesc) +
543                     offsetof(VRingPackedDesc, len);
544 
545     virtio_tswap32s(vdev, &desc->len);
546     virtio_tswap16s(vdev, &desc->id);
547     address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
548     address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
549     address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
550     address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
551 }
552 
553 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
554                                           VRingPackedDesc *desc,
555                                           MemoryRegionCache *cache,
556                                           int i)
557 {
558     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
559 
560     virtio_stw_phys_cached(vdev, cache, off, desc->flags);
561     address_space_cache_invalidate(cache, off, sizeof(desc->flags));
562 }
563 
564 static void vring_packed_desc_write(VirtIODevice *vdev,
565                                     VRingPackedDesc *desc,
566                                     MemoryRegionCache *cache,
567                                     int i, bool strict_order)
568 {
569     vring_packed_desc_write_data(vdev, desc, cache, i);
570     if (strict_order) {
571         /* Make sure data is wrote before flags. */
572         smp_wmb();
573     }
574     vring_packed_desc_write_flags(vdev, desc, cache, i);
575 }
576 
577 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
578 {
579     bool avail, used;
580 
581     avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
582     used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
583     return (avail != used) && (avail == wrap_counter);
584 }
585 
586 /* Fetch avail_idx from VQ memory only when we really need to know if
587  * guest has added some buffers.
588  * Called within rcu_read_lock().  */
589 static int virtio_queue_empty_rcu(VirtQueue *vq)
590 {
591     if (virtio_device_disabled(vq->vdev)) {
592         return 1;
593     }
594 
595     if (unlikely(!vq->vring.avail)) {
596         return 1;
597     }
598 
599     if (vq->shadow_avail_idx != vq->last_avail_idx) {
600         return 0;
601     }
602 
603     return vring_avail_idx(vq) == vq->last_avail_idx;
604 }
605 
606 static int virtio_queue_split_empty(VirtQueue *vq)
607 {
608     bool empty;
609 
610     if (virtio_device_disabled(vq->vdev)) {
611         return 1;
612     }
613 
614     if (unlikely(!vq->vring.avail)) {
615         return 1;
616     }
617 
618     if (vq->shadow_avail_idx != vq->last_avail_idx) {
619         return 0;
620     }
621 
622     RCU_READ_LOCK_GUARD();
623     empty = vring_avail_idx(vq) == vq->last_avail_idx;
624     return empty;
625 }
626 
627 /* Called within rcu_read_lock().  */
628 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
629 {
630     struct VRingPackedDesc desc;
631     VRingMemoryRegionCaches *cache;
632 
633     if (unlikely(!vq->vring.desc)) {
634         return 1;
635     }
636 
637     cache = vring_get_region_caches(vq);
638     if (!cache) {
639         return 1;
640     }
641 
642     vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
643                                  vq->last_avail_idx);
644 
645     return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
646 }
647 
648 static int virtio_queue_packed_empty(VirtQueue *vq)
649 {
650     RCU_READ_LOCK_GUARD();
651     return virtio_queue_packed_empty_rcu(vq);
652 }
653 
654 int virtio_queue_empty(VirtQueue *vq)
655 {
656     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
657         return virtio_queue_packed_empty(vq);
658     } else {
659         return virtio_queue_split_empty(vq);
660     }
661 }
662 
663 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
664                                unsigned int len)
665 {
666     AddressSpace *dma_as = vq->vdev->dma_as;
667     unsigned int offset;
668     int i;
669 
670     offset = 0;
671     for (i = 0; i < elem->in_num; i++) {
672         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
673 
674         dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
675                          elem->in_sg[i].iov_len,
676                          DMA_DIRECTION_FROM_DEVICE, size);
677 
678         offset += size;
679     }
680 
681     for (i = 0; i < elem->out_num; i++)
682         dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
683                          elem->out_sg[i].iov_len,
684                          DMA_DIRECTION_TO_DEVICE,
685                          elem->out_sg[i].iov_len);
686 }
687 
688 /* virtqueue_detach_element:
689  * @vq: The #VirtQueue
690  * @elem: The #VirtQueueElement
691  * @len: number of bytes written
692  *
693  * Detach the element from the virtqueue.  This function is suitable for device
694  * reset or other situations where a #VirtQueueElement is simply freed and will
695  * not be pushed or discarded.
696  */
697 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
698                               unsigned int len)
699 {
700     vq->inuse -= elem->ndescs;
701     virtqueue_unmap_sg(vq, elem, len);
702 }
703 
704 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
705 {
706     vq->last_avail_idx -= num;
707 }
708 
709 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
710 {
711     if (vq->last_avail_idx < num) {
712         vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
713         vq->last_avail_wrap_counter ^= 1;
714     } else {
715         vq->last_avail_idx -= num;
716     }
717 }
718 
719 /* virtqueue_unpop:
720  * @vq: The #VirtQueue
721  * @elem: The #VirtQueueElement
722  * @len: number of bytes written
723  *
724  * Pretend the most recent element wasn't popped from the virtqueue.  The next
725  * call to virtqueue_pop() will refetch the element.
726  */
727 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
728                      unsigned int len)
729 {
730 
731     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
732         virtqueue_packed_rewind(vq, 1);
733     } else {
734         virtqueue_split_rewind(vq, 1);
735     }
736 
737     virtqueue_detach_element(vq, elem, len);
738 }
739 
740 /* virtqueue_rewind:
741  * @vq: The #VirtQueue
742  * @num: Number of elements to push back
743  *
744  * Pretend that elements weren't popped from the virtqueue.  The next
745  * virtqueue_pop() will refetch the oldest element.
746  *
747  * Use virtqueue_unpop() instead if you have a VirtQueueElement.
748  *
749  * Returns: true on success, false if @num is greater than the number of in use
750  * elements.
751  */
752 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
753 {
754     if (num > vq->inuse) {
755         return false;
756     }
757 
758     vq->inuse -= num;
759     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
760         virtqueue_packed_rewind(vq, num);
761     } else {
762         virtqueue_split_rewind(vq, num);
763     }
764     return true;
765 }
766 
767 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
768                     unsigned int len, unsigned int idx)
769 {
770     VRingUsedElem uelem;
771 
772     if (unlikely(!vq->vring.used)) {
773         return;
774     }
775 
776     idx = (idx + vq->used_idx) % vq->vring.num;
777 
778     uelem.id = elem->index;
779     uelem.len = len;
780     vring_used_write(vq, &uelem, idx);
781 }
782 
783 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
784                                   unsigned int len, unsigned int idx)
785 {
786     vq->used_elems[idx].index = elem->index;
787     vq->used_elems[idx].len = len;
788     vq->used_elems[idx].ndescs = elem->ndescs;
789 }
790 
791 static void virtqueue_packed_fill_desc(VirtQueue *vq,
792                                        const VirtQueueElement *elem,
793                                        unsigned int idx,
794                                        bool strict_order)
795 {
796     uint16_t head;
797     VRingMemoryRegionCaches *caches;
798     VRingPackedDesc desc = {
799         .id = elem->index,
800         .len = elem->len,
801     };
802     bool wrap_counter = vq->used_wrap_counter;
803 
804     if (unlikely(!vq->vring.desc)) {
805         return;
806     }
807 
808     head = vq->used_idx + idx;
809     if (head >= vq->vring.num) {
810         head -= vq->vring.num;
811         wrap_counter ^= 1;
812     }
813     if (wrap_counter) {
814         desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
815         desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
816     } else {
817         desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
818         desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
819     }
820 
821     caches = vring_get_region_caches(vq);
822     if (!caches) {
823         return;
824     }
825 
826     vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
827 }
828 
829 /* Called within rcu_read_lock().  */
830 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
831                     unsigned int len, unsigned int idx)
832 {
833     trace_virtqueue_fill(vq, elem, len, idx);
834 
835     virtqueue_unmap_sg(vq, elem, len);
836 
837     if (virtio_device_disabled(vq->vdev)) {
838         return;
839     }
840 
841     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
842         virtqueue_packed_fill(vq, elem, len, idx);
843     } else {
844         virtqueue_split_fill(vq, elem, len, idx);
845     }
846 }
847 
848 /* Called within rcu_read_lock().  */
849 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
850 {
851     uint16_t old, new;
852 
853     if (unlikely(!vq->vring.used)) {
854         return;
855     }
856 
857     /* Make sure buffer is written before we update index. */
858     smp_wmb();
859     trace_virtqueue_flush(vq, count);
860     old = vq->used_idx;
861     new = old + count;
862     vring_used_idx_set(vq, new);
863     vq->inuse -= count;
864     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
865         vq->signalled_used_valid = false;
866 }
867 
868 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
869 {
870     unsigned int i, ndescs = 0;
871 
872     if (unlikely(!vq->vring.desc)) {
873         return;
874     }
875 
876     for (i = 1; i < count; i++) {
877         virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
878         ndescs += vq->used_elems[i].ndescs;
879     }
880     virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
881     ndescs += vq->used_elems[0].ndescs;
882 
883     vq->inuse -= ndescs;
884     vq->used_idx += ndescs;
885     if (vq->used_idx >= vq->vring.num) {
886         vq->used_idx -= vq->vring.num;
887         vq->used_wrap_counter ^= 1;
888         vq->signalled_used_valid = false;
889     }
890 }
891 
892 void virtqueue_flush(VirtQueue *vq, unsigned int count)
893 {
894     if (virtio_device_disabled(vq->vdev)) {
895         vq->inuse -= count;
896         return;
897     }
898 
899     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
900         virtqueue_packed_flush(vq, count);
901     } else {
902         virtqueue_split_flush(vq, count);
903     }
904 }
905 
906 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
907                     unsigned int len)
908 {
909     RCU_READ_LOCK_GUARD();
910     virtqueue_fill(vq, elem, len, 0);
911     virtqueue_flush(vq, 1);
912 }
913 
914 /* Called within rcu_read_lock().  */
915 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
916 {
917     uint16_t num_heads = vring_avail_idx(vq) - idx;
918 
919     /* Check it isn't doing very strange things with descriptor numbers. */
920     if (num_heads > vq->vring.num) {
921         virtio_error(vq->vdev, "Guest moved used index from %u to %u",
922                      idx, vq->shadow_avail_idx);
923         return -EINVAL;
924     }
925     /* On success, callers read a descriptor at vq->last_avail_idx.
926      * Make sure descriptor read does not bypass avail index read. */
927     if (num_heads) {
928         smp_rmb();
929     }
930 
931     return num_heads;
932 }
933 
934 /* Called within rcu_read_lock().  */
935 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
936                                unsigned int *head)
937 {
938     /* Grab the next descriptor number they're advertising, and increment
939      * the index we've seen. */
940     *head = vring_avail_ring(vq, idx % vq->vring.num);
941 
942     /* If their number is silly, that's a fatal mistake. */
943     if (*head >= vq->vring.num) {
944         virtio_error(vq->vdev, "Guest says index %u is available", *head);
945         return false;
946     }
947 
948     return true;
949 }
950 
951 enum {
952     VIRTQUEUE_READ_DESC_ERROR = -1,
953     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
954     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
955 };
956 
957 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
958                                           MemoryRegionCache *desc_cache,
959                                           unsigned int max, unsigned int *next)
960 {
961     /* If this descriptor says it doesn't chain, we're done. */
962     if (!(desc->flags & VRING_DESC_F_NEXT)) {
963         return VIRTQUEUE_READ_DESC_DONE;
964     }
965 
966     /* Check they're not leading us off end of descriptors. */
967     *next = desc->next;
968     /* Make sure compiler knows to grab that: we don't want it changing! */
969     smp_wmb();
970 
971     if (*next >= max) {
972         virtio_error(vdev, "Desc next is %u", *next);
973         return VIRTQUEUE_READ_DESC_ERROR;
974     }
975 
976     vring_split_desc_read(vdev, desc, desc_cache, *next);
977     return VIRTQUEUE_READ_DESC_MORE;
978 }
979 
980 /* Called within rcu_read_lock().  */
981 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
982                             unsigned int *in_bytes, unsigned int *out_bytes,
983                             unsigned max_in_bytes, unsigned max_out_bytes,
984                             VRingMemoryRegionCaches *caches)
985 {
986     VirtIODevice *vdev = vq->vdev;
987     unsigned int max, idx;
988     unsigned int total_bufs, in_total, out_total;
989     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
990     int64_t len = 0;
991     int rc;
992 
993     idx = vq->last_avail_idx;
994     total_bufs = in_total = out_total = 0;
995 
996     max = vq->vring.num;
997 
998     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
999         MemoryRegionCache *desc_cache = &caches->desc;
1000         unsigned int num_bufs;
1001         VRingDesc desc;
1002         unsigned int i;
1003 
1004         num_bufs = total_bufs;
1005 
1006         if (!virtqueue_get_head(vq, idx++, &i)) {
1007             goto err;
1008         }
1009 
1010         vring_split_desc_read(vdev, &desc, desc_cache, i);
1011 
1012         if (desc.flags & VRING_DESC_F_INDIRECT) {
1013             if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1014                 virtio_error(vdev, "Invalid size for indirect buffer table");
1015                 goto err;
1016             }
1017 
1018             /* If we've got too many, that implies a descriptor loop. */
1019             if (num_bufs >= max) {
1020                 virtio_error(vdev, "Looped descriptor");
1021                 goto err;
1022             }
1023 
1024             /* loop over the indirect descriptor table */
1025             len = address_space_cache_init(&indirect_desc_cache,
1026                                            vdev->dma_as,
1027                                            desc.addr, desc.len, false);
1028             desc_cache = &indirect_desc_cache;
1029             if (len < desc.len) {
1030                 virtio_error(vdev, "Cannot map indirect buffer");
1031                 goto err;
1032             }
1033 
1034             max = desc.len / sizeof(VRingDesc);
1035             num_bufs = i = 0;
1036             vring_split_desc_read(vdev, &desc, desc_cache, i);
1037         }
1038 
1039         do {
1040             /* If we've got too many, that implies a descriptor loop. */
1041             if (++num_bufs > max) {
1042                 virtio_error(vdev, "Looped descriptor");
1043                 goto err;
1044             }
1045 
1046             if (desc.flags & VRING_DESC_F_WRITE) {
1047                 in_total += desc.len;
1048             } else {
1049                 out_total += desc.len;
1050             }
1051             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1052                 goto done;
1053             }
1054 
1055             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1056         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1057 
1058         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1059             goto err;
1060         }
1061 
1062         if (desc_cache == &indirect_desc_cache) {
1063             address_space_cache_destroy(&indirect_desc_cache);
1064             total_bufs++;
1065         } else {
1066             total_bufs = num_bufs;
1067         }
1068     }
1069 
1070     if (rc < 0) {
1071         goto err;
1072     }
1073 
1074 done:
1075     address_space_cache_destroy(&indirect_desc_cache);
1076     if (in_bytes) {
1077         *in_bytes = in_total;
1078     }
1079     if (out_bytes) {
1080         *out_bytes = out_total;
1081     }
1082     return;
1083 
1084 err:
1085     in_total = out_total = 0;
1086     goto done;
1087 }
1088 
1089 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1090                                            VRingPackedDesc *desc,
1091                                            MemoryRegionCache
1092                                            *desc_cache,
1093                                            unsigned int max,
1094                                            unsigned int *next,
1095                                            bool indirect)
1096 {
1097     /* If this descriptor says it doesn't chain, we're done. */
1098     if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1099         return VIRTQUEUE_READ_DESC_DONE;
1100     }
1101 
1102     ++*next;
1103     if (*next == max) {
1104         if (indirect) {
1105             return VIRTQUEUE_READ_DESC_DONE;
1106         } else {
1107             (*next) -= vq->vring.num;
1108         }
1109     }
1110 
1111     vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1112     return VIRTQUEUE_READ_DESC_MORE;
1113 }
1114 
1115 /* Called within rcu_read_lock().  */
1116 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1117                                              unsigned int *in_bytes,
1118                                              unsigned int *out_bytes,
1119                                              unsigned max_in_bytes,
1120                                              unsigned max_out_bytes,
1121                                              VRingMemoryRegionCaches *caches)
1122 {
1123     VirtIODevice *vdev = vq->vdev;
1124     unsigned int max, idx;
1125     unsigned int total_bufs, in_total, out_total;
1126     MemoryRegionCache *desc_cache;
1127     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1128     int64_t len = 0;
1129     VRingPackedDesc desc;
1130     bool wrap_counter;
1131 
1132     idx = vq->last_avail_idx;
1133     wrap_counter = vq->last_avail_wrap_counter;
1134     total_bufs = in_total = out_total = 0;
1135 
1136     max = vq->vring.num;
1137 
1138     for (;;) {
1139         unsigned int num_bufs = total_bufs;
1140         unsigned int i = idx;
1141         int rc;
1142 
1143         desc_cache = &caches->desc;
1144         vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1145         if (!is_desc_avail(desc.flags, wrap_counter)) {
1146             break;
1147         }
1148 
1149         if (desc.flags & VRING_DESC_F_INDIRECT) {
1150             if (desc.len % sizeof(VRingPackedDesc)) {
1151                 virtio_error(vdev, "Invalid size for indirect buffer table");
1152                 goto err;
1153             }
1154 
1155             /* If we've got too many, that implies a descriptor loop. */
1156             if (num_bufs >= max) {
1157                 virtio_error(vdev, "Looped descriptor");
1158                 goto err;
1159             }
1160 
1161             /* loop over the indirect descriptor table */
1162             len = address_space_cache_init(&indirect_desc_cache,
1163                                            vdev->dma_as,
1164                                            desc.addr, desc.len, false);
1165             desc_cache = &indirect_desc_cache;
1166             if (len < desc.len) {
1167                 virtio_error(vdev, "Cannot map indirect buffer");
1168                 goto err;
1169             }
1170 
1171             max = desc.len / sizeof(VRingPackedDesc);
1172             num_bufs = i = 0;
1173             vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1174         }
1175 
1176         do {
1177             /* If we've got too many, that implies a descriptor loop. */
1178             if (++num_bufs > max) {
1179                 virtio_error(vdev, "Looped descriptor");
1180                 goto err;
1181             }
1182 
1183             if (desc.flags & VRING_DESC_F_WRITE) {
1184                 in_total += desc.len;
1185             } else {
1186                 out_total += desc.len;
1187             }
1188             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1189                 goto done;
1190             }
1191 
1192             rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1193                                                  &i, desc_cache ==
1194                                                  &indirect_desc_cache);
1195         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1196 
1197         if (desc_cache == &indirect_desc_cache) {
1198             address_space_cache_destroy(&indirect_desc_cache);
1199             total_bufs++;
1200             idx++;
1201         } else {
1202             idx += num_bufs - total_bufs;
1203             total_bufs = num_bufs;
1204         }
1205 
1206         if (idx >= vq->vring.num) {
1207             idx -= vq->vring.num;
1208             wrap_counter ^= 1;
1209         }
1210     }
1211 
1212     /* Record the index and wrap counter for a kick we want */
1213     vq->shadow_avail_idx = idx;
1214     vq->shadow_avail_wrap_counter = wrap_counter;
1215 done:
1216     address_space_cache_destroy(&indirect_desc_cache);
1217     if (in_bytes) {
1218         *in_bytes = in_total;
1219     }
1220     if (out_bytes) {
1221         *out_bytes = out_total;
1222     }
1223     return;
1224 
1225 err:
1226     in_total = out_total = 0;
1227     goto done;
1228 }
1229 
1230 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1231                                unsigned int *out_bytes,
1232                                unsigned max_in_bytes, unsigned max_out_bytes)
1233 {
1234     uint16_t desc_size;
1235     VRingMemoryRegionCaches *caches;
1236 
1237     RCU_READ_LOCK_GUARD();
1238 
1239     if (unlikely(!vq->vring.desc)) {
1240         goto err;
1241     }
1242 
1243     caches = vring_get_region_caches(vq);
1244     if (!caches) {
1245         goto err;
1246     }
1247 
1248     desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1249                                 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1250     if (caches->desc.len < vq->vring.num * desc_size) {
1251         virtio_error(vq->vdev, "Cannot map descriptor ring");
1252         goto err;
1253     }
1254 
1255     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1256         virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1257                                          max_in_bytes, max_out_bytes,
1258                                          caches);
1259     } else {
1260         virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1261                                         max_in_bytes, max_out_bytes,
1262                                         caches);
1263     }
1264 
1265     return;
1266 err:
1267     if (in_bytes) {
1268         *in_bytes = 0;
1269     }
1270     if (out_bytes) {
1271         *out_bytes = 0;
1272     }
1273 }
1274 
1275 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1276                           unsigned int out_bytes)
1277 {
1278     unsigned int in_total, out_total;
1279 
1280     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1281     return in_bytes <= in_total && out_bytes <= out_total;
1282 }
1283 
1284 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1285                                hwaddr *addr, struct iovec *iov,
1286                                unsigned int max_num_sg, bool is_write,
1287                                hwaddr pa, size_t sz)
1288 {
1289     bool ok = false;
1290     unsigned num_sg = *p_num_sg;
1291     assert(num_sg <= max_num_sg);
1292 
1293     if (!sz) {
1294         virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1295         goto out;
1296     }
1297 
1298     while (sz) {
1299         hwaddr len = sz;
1300 
1301         if (num_sg == max_num_sg) {
1302             virtio_error(vdev, "virtio: too many write descriptors in "
1303                                "indirect table");
1304             goto out;
1305         }
1306 
1307         iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1308                                               is_write ?
1309                                               DMA_DIRECTION_FROM_DEVICE :
1310                                               DMA_DIRECTION_TO_DEVICE,
1311                                               MEMTXATTRS_UNSPECIFIED);
1312         if (!iov[num_sg].iov_base) {
1313             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1314             goto out;
1315         }
1316 
1317         iov[num_sg].iov_len = len;
1318         addr[num_sg] = pa;
1319 
1320         sz -= len;
1321         pa += len;
1322         num_sg++;
1323     }
1324     ok = true;
1325 
1326 out:
1327     *p_num_sg = num_sg;
1328     return ok;
1329 }
1330 
1331 /* Only used by error code paths before we have a VirtQueueElement (therefore
1332  * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1333  * yet.
1334  */
1335 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1336                                     struct iovec *iov)
1337 {
1338     unsigned int i;
1339 
1340     for (i = 0; i < out_num + in_num; i++) {
1341         int is_write = i >= out_num;
1342 
1343         cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1344         iov++;
1345     }
1346 }
1347 
1348 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1349                                 hwaddr *addr, unsigned int num_sg,
1350                                 bool is_write)
1351 {
1352     unsigned int i;
1353     hwaddr len;
1354 
1355     for (i = 0; i < num_sg; i++) {
1356         len = sg[i].iov_len;
1357         sg[i].iov_base = dma_memory_map(vdev->dma_as,
1358                                         addr[i], &len, is_write ?
1359                                         DMA_DIRECTION_FROM_DEVICE :
1360                                         DMA_DIRECTION_TO_DEVICE,
1361                                         MEMTXATTRS_UNSPECIFIED);
1362         if (!sg[i].iov_base) {
1363             error_report("virtio: error trying to map MMIO memory");
1364             exit(1);
1365         }
1366         if (len != sg[i].iov_len) {
1367             error_report("virtio: unexpected memory split");
1368             exit(1);
1369         }
1370     }
1371 }
1372 
1373 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1374 {
1375     virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1376     virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1377                                                                         false);
1378 }
1379 
1380 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1381 {
1382     VirtQueueElement *elem;
1383     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1384     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1385     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1386     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1387     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1388     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1389 
1390     assert(sz >= sizeof(VirtQueueElement));
1391     elem = g_malloc(out_sg_end);
1392     trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1393     elem->out_num = out_num;
1394     elem->in_num = in_num;
1395     elem->in_addr = (void *)elem + in_addr_ofs;
1396     elem->out_addr = (void *)elem + out_addr_ofs;
1397     elem->in_sg = (void *)elem + in_sg_ofs;
1398     elem->out_sg = (void *)elem + out_sg_ofs;
1399     return elem;
1400 }
1401 
1402 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1403 {
1404     unsigned int i, head, max;
1405     VRingMemoryRegionCaches *caches;
1406     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1407     MemoryRegionCache *desc_cache;
1408     int64_t len;
1409     VirtIODevice *vdev = vq->vdev;
1410     VirtQueueElement *elem = NULL;
1411     unsigned out_num, in_num, elem_entries;
1412     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1413     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1414     VRingDesc desc;
1415     int rc;
1416 
1417     RCU_READ_LOCK_GUARD();
1418     if (virtio_queue_empty_rcu(vq)) {
1419         goto done;
1420     }
1421     /* Needed after virtio_queue_empty(), see comment in
1422      * virtqueue_num_heads(). */
1423     smp_rmb();
1424 
1425     /* When we start there are none of either input nor output. */
1426     out_num = in_num = elem_entries = 0;
1427 
1428     max = vq->vring.num;
1429 
1430     if (vq->inuse >= vq->vring.num) {
1431         virtio_error(vdev, "Virtqueue size exceeded");
1432         goto done;
1433     }
1434 
1435     if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1436         goto done;
1437     }
1438 
1439     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1440         vring_set_avail_event(vq, vq->last_avail_idx);
1441     }
1442 
1443     i = head;
1444 
1445     caches = vring_get_region_caches(vq);
1446     if (!caches) {
1447         virtio_error(vdev, "Region caches not initialized");
1448         goto done;
1449     }
1450 
1451     if (caches->desc.len < max * sizeof(VRingDesc)) {
1452         virtio_error(vdev, "Cannot map descriptor ring");
1453         goto done;
1454     }
1455 
1456     desc_cache = &caches->desc;
1457     vring_split_desc_read(vdev, &desc, desc_cache, i);
1458     if (desc.flags & VRING_DESC_F_INDIRECT) {
1459         if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1460             virtio_error(vdev, "Invalid size for indirect buffer table");
1461             goto done;
1462         }
1463 
1464         /* loop over the indirect descriptor table */
1465         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1466                                        desc.addr, desc.len, false);
1467         desc_cache = &indirect_desc_cache;
1468         if (len < desc.len) {
1469             virtio_error(vdev, "Cannot map indirect buffer");
1470             goto done;
1471         }
1472 
1473         max = desc.len / sizeof(VRingDesc);
1474         i = 0;
1475         vring_split_desc_read(vdev, &desc, desc_cache, i);
1476     }
1477 
1478     /* Collect all the descriptors */
1479     do {
1480         bool map_ok;
1481 
1482         if (desc.flags & VRING_DESC_F_WRITE) {
1483             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1484                                         iov + out_num,
1485                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1486                                         desc.addr, desc.len);
1487         } else {
1488             if (in_num) {
1489                 virtio_error(vdev, "Incorrect order for descriptors");
1490                 goto err_undo_map;
1491             }
1492             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1493                                         VIRTQUEUE_MAX_SIZE, false,
1494                                         desc.addr, desc.len);
1495         }
1496         if (!map_ok) {
1497             goto err_undo_map;
1498         }
1499 
1500         /* If we've got too many, that implies a descriptor loop. */
1501         if (++elem_entries > max) {
1502             virtio_error(vdev, "Looped descriptor");
1503             goto err_undo_map;
1504         }
1505 
1506         rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1507     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1508 
1509     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1510         goto err_undo_map;
1511     }
1512 
1513     /* Now copy what we have collected and mapped */
1514     elem = virtqueue_alloc_element(sz, out_num, in_num);
1515     elem->index = head;
1516     elem->ndescs = 1;
1517     for (i = 0; i < out_num; i++) {
1518         elem->out_addr[i] = addr[i];
1519         elem->out_sg[i] = iov[i];
1520     }
1521     for (i = 0; i < in_num; i++) {
1522         elem->in_addr[i] = addr[out_num + i];
1523         elem->in_sg[i] = iov[out_num + i];
1524     }
1525 
1526     vq->inuse++;
1527 
1528     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1529 done:
1530     address_space_cache_destroy(&indirect_desc_cache);
1531 
1532     return elem;
1533 
1534 err_undo_map:
1535     virtqueue_undo_map_desc(out_num, in_num, iov);
1536     goto done;
1537 }
1538 
1539 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1540 {
1541     unsigned int i, max;
1542     VRingMemoryRegionCaches *caches;
1543     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1544     MemoryRegionCache *desc_cache;
1545     int64_t len;
1546     VirtIODevice *vdev = vq->vdev;
1547     VirtQueueElement *elem = NULL;
1548     unsigned out_num, in_num, elem_entries;
1549     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1550     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1551     VRingPackedDesc desc;
1552     uint16_t id;
1553     int rc;
1554 
1555     RCU_READ_LOCK_GUARD();
1556     if (virtio_queue_packed_empty_rcu(vq)) {
1557         goto done;
1558     }
1559 
1560     /* When we start there are none of either input nor output. */
1561     out_num = in_num = elem_entries = 0;
1562 
1563     max = vq->vring.num;
1564 
1565     if (vq->inuse >= vq->vring.num) {
1566         virtio_error(vdev, "Virtqueue size exceeded");
1567         goto done;
1568     }
1569 
1570     i = vq->last_avail_idx;
1571 
1572     caches = vring_get_region_caches(vq);
1573     if (!caches) {
1574         virtio_error(vdev, "Region caches not initialized");
1575         goto done;
1576     }
1577 
1578     if (caches->desc.len < max * sizeof(VRingDesc)) {
1579         virtio_error(vdev, "Cannot map descriptor ring");
1580         goto done;
1581     }
1582 
1583     desc_cache = &caches->desc;
1584     vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1585     id = desc.id;
1586     if (desc.flags & VRING_DESC_F_INDIRECT) {
1587         if (desc.len % sizeof(VRingPackedDesc)) {
1588             virtio_error(vdev, "Invalid size for indirect buffer table");
1589             goto done;
1590         }
1591 
1592         /* loop over the indirect descriptor table */
1593         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1594                                        desc.addr, desc.len, false);
1595         desc_cache = &indirect_desc_cache;
1596         if (len < desc.len) {
1597             virtio_error(vdev, "Cannot map indirect buffer");
1598             goto done;
1599         }
1600 
1601         max = desc.len / sizeof(VRingPackedDesc);
1602         i = 0;
1603         vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1604     }
1605 
1606     /* Collect all the descriptors */
1607     do {
1608         bool map_ok;
1609 
1610         if (desc.flags & VRING_DESC_F_WRITE) {
1611             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1612                                         iov + out_num,
1613                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1614                                         desc.addr, desc.len);
1615         } else {
1616             if (in_num) {
1617                 virtio_error(vdev, "Incorrect order for descriptors");
1618                 goto err_undo_map;
1619             }
1620             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1621                                         VIRTQUEUE_MAX_SIZE, false,
1622                                         desc.addr, desc.len);
1623         }
1624         if (!map_ok) {
1625             goto err_undo_map;
1626         }
1627 
1628         /* If we've got too many, that implies a descriptor loop. */
1629         if (++elem_entries > max) {
1630             virtio_error(vdev, "Looped descriptor");
1631             goto err_undo_map;
1632         }
1633 
1634         rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1635                                              desc_cache ==
1636                                              &indirect_desc_cache);
1637     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1638 
1639     /* Now copy what we have collected and mapped */
1640     elem = virtqueue_alloc_element(sz, out_num, in_num);
1641     for (i = 0; i < out_num; i++) {
1642         elem->out_addr[i] = addr[i];
1643         elem->out_sg[i] = iov[i];
1644     }
1645     for (i = 0; i < in_num; i++) {
1646         elem->in_addr[i] = addr[out_num + i];
1647         elem->in_sg[i] = iov[out_num + i];
1648     }
1649 
1650     elem->index = id;
1651     elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1652     vq->last_avail_idx += elem->ndescs;
1653     vq->inuse += elem->ndescs;
1654 
1655     if (vq->last_avail_idx >= vq->vring.num) {
1656         vq->last_avail_idx -= vq->vring.num;
1657         vq->last_avail_wrap_counter ^= 1;
1658     }
1659 
1660     vq->shadow_avail_idx = vq->last_avail_idx;
1661     vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1662 
1663     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1664 done:
1665     address_space_cache_destroy(&indirect_desc_cache);
1666 
1667     return elem;
1668 
1669 err_undo_map:
1670     virtqueue_undo_map_desc(out_num, in_num, iov);
1671     goto done;
1672 }
1673 
1674 void *virtqueue_pop(VirtQueue *vq, size_t sz)
1675 {
1676     if (virtio_device_disabled(vq->vdev)) {
1677         return NULL;
1678     }
1679 
1680     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1681         return virtqueue_packed_pop(vq, sz);
1682     } else {
1683         return virtqueue_split_pop(vq, sz);
1684     }
1685 }
1686 
1687 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1688 {
1689     VRingMemoryRegionCaches *caches;
1690     MemoryRegionCache *desc_cache;
1691     unsigned int dropped = 0;
1692     VirtQueueElement elem = {};
1693     VirtIODevice *vdev = vq->vdev;
1694     VRingPackedDesc desc;
1695 
1696     RCU_READ_LOCK_GUARD();
1697 
1698     caches = vring_get_region_caches(vq);
1699     if (!caches) {
1700         return 0;
1701     }
1702 
1703     desc_cache = &caches->desc;
1704 
1705     virtio_queue_set_notification(vq, 0);
1706 
1707     while (vq->inuse < vq->vring.num) {
1708         unsigned int idx = vq->last_avail_idx;
1709         /*
1710          * works similar to virtqueue_pop but does not map buffers
1711          * and does not allocate any memory.
1712          */
1713         vring_packed_desc_read(vdev, &desc, desc_cache,
1714                                vq->last_avail_idx , true);
1715         if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1716             break;
1717         }
1718         elem.index = desc.id;
1719         elem.ndescs = 1;
1720         while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1721                                                vq->vring.num, &idx, false)) {
1722             ++elem.ndescs;
1723         }
1724         /*
1725          * immediately push the element, nothing to unmap
1726          * as both in_num and out_num are set to 0.
1727          */
1728         virtqueue_push(vq, &elem, 0);
1729         dropped++;
1730         vq->last_avail_idx += elem.ndescs;
1731         if (vq->last_avail_idx >= vq->vring.num) {
1732             vq->last_avail_idx -= vq->vring.num;
1733             vq->last_avail_wrap_counter ^= 1;
1734         }
1735     }
1736 
1737     return dropped;
1738 }
1739 
1740 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1741 {
1742     unsigned int dropped = 0;
1743     VirtQueueElement elem = {};
1744     VirtIODevice *vdev = vq->vdev;
1745     bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1746 
1747     while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1748         /* works similar to virtqueue_pop but does not map buffers
1749         * and does not allocate any memory */
1750         smp_rmb();
1751         if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1752             break;
1753         }
1754         vq->inuse++;
1755         vq->last_avail_idx++;
1756         if (fEventIdx) {
1757             vring_set_avail_event(vq, vq->last_avail_idx);
1758         }
1759         /* immediately push the element, nothing to unmap
1760          * as both in_num and out_num are set to 0 */
1761         virtqueue_push(vq, &elem, 0);
1762         dropped++;
1763     }
1764 
1765     return dropped;
1766 }
1767 
1768 /* virtqueue_drop_all:
1769  * @vq: The #VirtQueue
1770  * Drops all queued buffers and indicates them to the guest
1771  * as if they are done. Useful when buffers can not be
1772  * processed but must be returned to the guest.
1773  */
1774 unsigned int virtqueue_drop_all(VirtQueue *vq)
1775 {
1776     struct VirtIODevice *vdev = vq->vdev;
1777 
1778     if (virtio_device_disabled(vq->vdev)) {
1779         return 0;
1780     }
1781 
1782     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1783         return virtqueue_packed_drop_all(vq);
1784     } else {
1785         return virtqueue_split_drop_all(vq);
1786     }
1787 }
1788 
1789 /* Reading and writing a structure directly to QEMUFile is *awful*, but
1790  * it is what QEMU has always done by mistake.  We can change it sooner
1791  * or later by bumping the version number of the affected vm states.
1792  * In the meanwhile, since the in-memory layout of VirtQueueElement
1793  * has changed, we need to marshal to and from the layout that was
1794  * used before the change.
1795  */
1796 typedef struct VirtQueueElementOld {
1797     unsigned int index;
1798     unsigned int out_num;
1799     unsigned int in_num;
1800     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1801     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1802     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1803     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1804 } VirtQueueElementOld;
1805 
1806 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1807 {
1808     VirtQueueElement *elem;
1809     VirtQueueElementOld data;
1810     int i;
1811 
1812     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1813 
1814     /* TODO: teach all callers that this can fail, and return failure instead
1815      * of asserting here.
1816      * This is just one thing (there are probably more) that must be
1817      * fixed before we can allow NDEBUG compilation.
1818      */
1819     assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1820     assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1821 
1822     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1823     elem->index = data.index;
1824 
1825     for (i = 0; i < elem->in_num; i++) {
1826         elem->in_addr[i] = data.in_addr[i];
1827     }
1828 
1829     for (i = 0; i < elem->out_num; i++) {
1830         elem->out_addr[i] = data.out_addr[i];
1831     }
1832 
1833     for (i = 0; i < elem->in_num; i++) {
1834         /* Base is overwritten by virtqueue_map.  */
1835         elem->in_sg[i].iov_base = 0;
1836         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1837     }
1838 
1839     for (i = 0; i < elem->out_num; i++) {
1840         /* Base is overwritten by virtqueue_map.  */
1841         elem->out_sg[i].iov_base = 0;
1842         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1843     }
1844 
1845     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1846         qemu_get_be32s(f, &elem->ndescs);
1847     }
1848 
1849     virtqueue_map(vdev, elem);
1850     return elem;
1851 }
1852 
1853 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1854                                 VirtQueueElement *elem)
1855 {
1856     VirtQueueElementOld data;
1857     int i;
1858 
1859     memset(&data, 0, sizeof(data));
1860     data.index = elem->index;
1861     data.in_num = elem->in_num;
1862     data.out_num = elem->out_num;
1863 
1864     for (i = 0; i < elem->in_num; i++) {
1865         data.in_addr[i] = elem->in_addr[i];
1866     }
1867 
1868     for (i = 0; i < elem->out_num; i++) {
1869         data.out_addr[i] = elem->out_addr[i];
1870     }
1871 
1872     for (i = 0; i < elem->in_num; i++) {
1873         /* Base is overwritten by virtqueue_map when loading.  Do not
1874          * save it, as it would leak the QEMU address space layout.  */
1875         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1876     }
1877 
1878     for (i = 0; i < elem->out_num; i++) {
1879         /* Do not save iov_base as above.  */
1880         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1881     }
1882 
1883     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1884         qemu_put_be32s(f, &elem->ndescs);
1885     }
1886 
1887     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1888 }
1889 
1890 /* virtio device */
1891 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1892 {
1893     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1894     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1895 
1896     if (virtio_device_disabled(vdev)) {
1897         return;
1898     }
1899 
1900     if (k->notify) {
1901         k->notify(qbus->parent, vector);
1902     }
1903 }
1904 
1905 void virtio_update_irq(VirtIODevice *vdev)
1906 {
1907     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1908 }
1909 
1910 static int virtio_validate_features(VirtIODevice *vdev)
1911 {
1912     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1913 
1914     if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1915         !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1916         return -EFAULT;
1917     }
1918 
1919     if (k->validate_features) {
1920         return k->validate_features(vdev);
1921     } else {
1922         return 0;
1923     }
1924 }
1925 
1926 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1927 {
1928     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1929     trace_virtio_set_status(vdev, val);
1930 
1931     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1932         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
1933             val & VIRTIO_CONFIG_S_FEATURES_OK) {
1934             int ret = virtio_validate_features(vdev);
1935 
1936             if (ret) {
1937                 return ret;
1938             }
1939         }
1940     }
1941 
1942     if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
1943         (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1944         virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
1945     }
1946 
1947     if (k->set_status) {
1948         k->set_status(vdev, val);
1949     }
1950     vdev->status = val;
1951 
1952     return 0;
1953 }
1954 
1955 static enum virtio_device_endian virtio_default_endian(void)
1956 {
1957     if (target_words_bigendian()) {
1958         return VIRTIO_DEVICE_ENDIAN_BIG;
1959     } else {
1960         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1961     }
1962 }
1963 
1964 static enum virtio_device_endian virtio_current_cpu_endian(void)
1965 {
1966     if (cpu_virtio_is_big_endian(current_cpu)) {
1967         return VIRTIO_DEVICE_ENDIAN_BIG;
1968     } else {
1969         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1970     }
1971 }
1972 
1973 void virtio_reset(void *opaque)
1974 {
1975     VirtIODevice *vdev = opaque;
1976     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1977     int i;
1978 
1979     virtio_set_status(vdev, 0);
1980     if (current_cpu) {
1981         /* Guest initiated reset */
1982         vdev->device_endian = virtio_current_cpu_endian();
1983     } else {
1984         /* System reset */
1985         vdev->device_endian = virtio_default_endian();
1986     }
1987 
1988     if (k->reset) {
1989         k->reset(vdev);
1990     }
1991 
1992     vdev->start_on_kick = false;
1993     vdev->started = false;
1994     vdev->broken = false;
1995     vdev->guest_features = 0;
1996     vdev->queue_sel = 0;
1997     vdev->status = 0;
1998     vdev->disabled = false;
1999     qatomic_set(&vdev->isr, 0);
2000     vdev->config_vector = VIRTIO_NO_VECTOR;
2001     virtio_notify_vector(vdev, vdev->config_vector);
2002 
2003     for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2004         vdev->vq[i].vring.desc = 0;
2005         vdev->vq[i].vring.avail = 0;
2006         vdev->vq[i].vring.used = 0;
2007         vdev->vq[i].last_avail_idx = 0;
2008         vdev->vq[i].shadow_avail_idx = 0;
2009         vdev->vq[i].used_idx = 0;
2010         vdev->vq[i].last_avail_wrap_counter = true;
2011         vdev->vq[i].shadow_avail_wrap_counter = true;
2012         vdev->vq[i].used_wrap_counter = true;
2013         virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2014         vdev->vq[i].signalled_used = 0;
2015         vdev->vq[i].signalled_used_valid = false;
2016         vdev->vq[i].notification = true;
2017         vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2018         vdev->vq[i].inuse = 0;
2019         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2020     }
2021 }
2022 
2023 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
2024 {
2025     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2026     uint8_t val;
2027 
2028     if (addr + sizeof(val) > vdev->config_len) {
2029         return (uint32_t)-1;
2030     }
2031 
2032     k->get_config(vdev, vdev->config);
2033 
2034     val = ldub_p(vdev->config + addr);
2035     return val;
2036 }
2037 
2038 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
2039 {
2040     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2041     uint16_t val;
2042 
2043     if (addr + sizeof(val) > vdev->config_len) {
2044         return (uint32_t)-1;
2045     }
2046 
2047     k->get_config(vdev, vdev->config);
2048 
2049     val = lduw_p(vdev->config + addr);
2050     return val;
2051 }
2052 
2053 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
2054 {
2055     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2056     uint32_t val;
2057 
2058     if (addr + sizeof(val) > vdev->config_len) {
2059         return (uint32_t)-1;
2060     }
2061 
2062     k->get_config(vdev, vdev->config);
2063 
2064     val = ldl_p(vdev->config + addr);
2065     return val;
2066 }
2067 
2068 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2069 {
2070     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2071     uint8_t val = data;
2072 
2073     if (addr + sizeof(val) > vdev->config_len) {
2074         return;
2075     }
2076 
2077     stb_p(vdev->config + addr, val);
2078 
2079     if (k->set_config) {
2080         k->set_config(vdev, vdev->config);
2081     }
2082 }
2083 
2084 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2085 {
2086     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2087     uint16_t val = data;
2088 
2089     if (addr + sizeof(val) > vdev->config_len) {
2090         return;
2091     }
2092 
2093     stw_p(vdev->config + addr, val);
2094 
2095     if (k->set_config) {
2096         k->set_config(vdev, vdev->config);
2097     }
2098 }
2099 
2100 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2101 {
2102     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2103     uint32_t val = data;
2104 
2105     if (addr + sizeof(val) > vdev->config_len) {
2106         return;
2107     }
2108 
2109     stl_p(vdev->config + addr, val);
2110 
2111     if (k->set_config) {
2112         k->set_config(vdev, vdev->config);
2113     }
2114 }
2115 
2116 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2117 {
2118     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2119     uint8_t val;
2120 
2121     if (addr + sizeof(val) > vdev->config_len) {
2122         return (uint32_t)-1;
2123     }
2124 
2125     k->get_config(vdev, vdev->config);
2126 
2127     val = ldub_p(vdev->config + addr);
2128     return val;
2129 }
2130 
2131 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2132 {
2133     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2134     uint16_t val;
2135 
2136     if (addr + sizeof(val) > vdev->config_len) {
2137         return (uint32_t)-1;
2138     }
2139 
2140     k->get_config(vdev, vdev->config);
2141 
2142     val = lduw_le_p(vdev->config + addr);
2143     return val;
2144 }
2145 
2146 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2147 {
2148     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2149     uint32_t val;
2150 
2151     if (addr + sizeof(val) > vdev->config_len) {
2152         return (uint32_t)-1;
2153     }
2154 
2155     k->get_config(vdev, vdev->config);
2156 
2157     val = ldl_le_p(vdev->config + addr);
2158     return val;
2159 }
2160 
2161 void virtio_config_modern_writeb(VirtIODevice *vdev,
2162                                  uint32_t addr, uint32_t data)
2163 {
2164     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2165     uint8_t val = data;
2166 
2167     if (addr + sizeof(val) > vdev->config_len) {
2168         return;
2169     }
2170 
2171     stb_p(vdev->config + addr, val);
2172 
2173     if (k->set_config) {
2174         k->set_config(vdev, vdev->config);
2175     }
2176 }
2177 
2178 void virtio_config_modern_writew(VirtIODevice *vdev,
2179                                  uint32_t addr, uint32_t data)
2180 {
2181     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2182     uint16_t val = data;
2183 
2184     if (addr + sizeof(val) > vdev->config_len) {
2185         return;
2186     }
2187 
2188     stw_le_p(vdev->config + addr, val);
2189 
2190     if (k->set_config) {
2191         k->set_config(vdev, vdev->config);
2192     }
2193 }
2194 
2195 void virtio_config_modern_writel(VirtIODevice *vdev,
2196                                  uint32_t addr, uint32_t data)
2197 {
2198     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2199     uint32_t val = data;
2200 
2201     if (addr + sizeof(val) > vdev->config_len) {
2202         return;
2203     }
2204 
2205     stl_le_p(vdev->config + addr, val);
2206 
2207     if (k->set_config) {
2208         k->set_config(vdev, vdev->config);
2209     }
2210 }
2211 
2212 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2213 {
2214     if (!vdev->vq[n].vring.num) {
2215         return;
2216     }
2217     vdev->vq[n].vring.desc = addr;
2218     virtio_queue_update_rings(vdev, n);
2219 }
2220 
2221 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2222 {
2223     return vdev->vq[n].vring.desc;
2224 }
2225 
2226 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2227                             hwaddr avail, hwaddr used)
2228 {
2229     if (!vdev->vq[n].vring.num) {
2230         return;
2231     }
2232     vdev->vq[n].vring.desc = desc;
2233     vdev->vq[n].vring.avail = avail;
2234     vdev->vq[n].vring.used = used;
2235     virtio_init_region_cache(vdev, n);
2236 }
2237 
2238 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2239 {
2240     /* Don't allow guest to flip queue between existent and
2241      * nonexistent states, or to set it to an invalid size.
2242      */
2243     if (!!num != !!vdev->vq[n].vring.num ||
2244         num > VIRTQUEUE_MAX_SIZE ||
2245         num < 0) {
2246         return;
2247     }
2248     vdev->vq[n].vring.num = num;
2249 }
2250 
2251 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2252 {
2253     return QLIST_FIRST(&vdev->vector_queues[vector]);
2254 }
2255 
2256 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2257 {
2258     return QLIST_NEXT(vq, node);
2259 }
2260 
2261 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2262 {
2263     return vdev->vq[n].vring.num;
2264 }
2265 
2266 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2267 {
2268     return vdev->vq[n].vring.num_default;
2269 }
2270 
2271 int virtio_get_num_queues(VirtIODevice *vdev)
2272 {
2273     int i;
2274 
2275     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2276         if (!virtio_queue_get_num(vdev, i)) {
2277             break;
2278         }
2279     }
2280 
2281     return i;
2282 }
2283 
2284 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2285 {
2286     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2287     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2288 
2289     /* virtio-1 compliant devices cannot change the alignment */
2290     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2291         error_report("tried to modify queue alignment for virtio-1 device");
2292         return;
2293     }
2294     /* Check that the transport told us it was going to do this
2295      * (so a buggy transport will immediately assert rather than
2296      * silently failing to migrate this state)
2297      */
2298     assert(k->has_variable_vring_alignment);
2299 
2300     if (align) {
2301         vdev->vq[n].vring.align = align;
2302         virtio_queue_update_rings(vdev, n);
2303     }
2304 }
2305 
2306 static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
2307 {
2308     bool ret = false;
2309 
2310     if (vq->vring.desc && vq->handle_aio_output) {
2311         VirtIODevice *vdev = vq->vdev;
2312 
2313         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2314         ret = vq->handle_aio_output(vdev, vq);
2315 
2316         if (unlikely(vdev->start_on_kick)) {
2317             virtio_set_started(vdev, true);
2318         }
2319     }
2320 
2321     return ret;
2322 }
2323 
2324 static void virtio_queue_notify_vq(VirtQueue *vq)
2325 {
2326     if (vq->vring.desc && vq->handle_output) {
2327         VirtIODevice *vdev = vq->vdev;
2328 
2329         if (unlikely(vdev->broken)) {
2330             return;
2331         }
2332 
2333         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2334         vq->handle_output(vdev, vq);
2335 
2336         if (unlikely(vdev->start_on_kick)) {
2337             virtio_set_started(vdev, true);
2338         }
2339     }
2340 }
2341 
2342 void virtio_queue_notify(VirtIODevice *vdev, int n)
2343 {
2344     VirtQueue *vq = &vdev->vq[n];
2345 
2346     if (unlikely(!vq->vring.desc || vdev->broken)) {
2347         return;
2348     }
2349 
2350     trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2351     if (vq->host_notifier_enabled) {
2352         event_notifier_set(&vq->host_notifier);
2353     } else if (vq->handle_output) {
2354         vq->handle_output(vdev, vq);
2355 
2356         if (unlikely(vdev->start_on_kick)) {
2357             virtio_set_started(vdev, true);
2358         }
2359     }
2360 }
2361 
2362 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2363 {
2364     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2365         VIRTIO_NO_VECTOR;
2366 }
2367 
2368 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2369 {
2370     VirtQueue *vq = &vdev->vq[n];
2371 
2372     if (n < VIRTIO_QUEUE_MAX) {
2373         if (vdev->vector_queues &&
2374             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2375             QLIST_REMOVE(vq, node);
2376         }
2377         vdev->vq[n].vector = vector;
2378         if (vdev->vector_queues &&
2379             vector != VIRTIO_NO_VECTOR) {
2380             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2381         }
2382     }
2383 }
2384 
2385 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2386                             VirtIOHandleOutput handle_output)
2387 {
2388     int i;
2389 
2390     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2391         if (vdev->vq[i].vring.num == 0)
2392             break;
2393     }
2394 
2395     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2396         abort();
2397 
2398     vdev->vq[i].vring.num = queue_size;
2399     vdev->vq[i].vring.num_default = queue_size;
2400     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2401     vdev->vq[i].handle_output = handle_output;
2402     vdev->vq[i].handle_aio_output = NULL;
2403     vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) *
2404                                        queue_size);
2405 
2406     return &vdev->vq[i];
2407 }
2408 
2409 void virtio_delete_queue(VirtQueue *vq)
2410 {
2411     vq->vring.num = 0;
2412     vq->vring.num_default = 0;
2413     vq->handle_output = NULL;
2414     vq->handle_aio_output = NULL;
2415     g_free(vq->used_elems);
2416     vq->used_elems = NULL;
2417     virtio_virtqueue_reset_region_cache(vq);
2418 }
2419 
2420 void virtio_del_queue(VirtIODevice *vdev, int n)
2421 {
2422     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2423         abort();
2424     }
2425 
2426     virtio_delete_queue(&vdev->vq[n]);
2427 }
2428 
2429 static void virtio_set_isr(VirtIODevice *vdev, int value)
2430 {
2431     uint8_t old = qatomic_read(&vdev->isr);
2432 
2433     /* Do not write ISR if it does not change, so that its cacheline remains
2434      * shared in the common case where the guest does not read it.
2435      */
2436     if ((old & value) != value) {
2437         qatomic_or(&vdev->isr, value);
2438     }
2439 }
2440 
2441 /* Called within rcu_read_lock(). */
2442 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2443 {
2444     uint16_t old, new;
2445     bool v;
2446     /* We need to expose used array entries before checking used event. */
2447     smp_mb();
2448     /* Always notify when queue is empty (when feature acknowledge) */
2449     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2450         !vq->inuse && virtio_queue_empty(vq)) {
2451         return true;
2452     }
2453 
2454     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2455         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2456     }
2457 
2458     v = vq->signalled_used_valid;
2459     vq->signalled_used_valid = true;
2460     old = vq->signalled_used;
2461     new = vq->signalled_used = vq->used_idx;
2462     return !v || vring_need_event(vring_get_used_event(vq), new, old);
2463 }
2464 
2465 static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2466                                     uint16_t off_wrap, uint16_t new,
2467                                     uint16_t old)
2468 {
2469     int off = off_wrap & ~(1 << 15);
2470 
2471     if (wrap != off_wrap >> 15) {
2472         off -= vq->vring.num;
2473     }
2474 
2475     return vring_need_event(off, new, old);
2476 }
2477 
2478 /* Called within rcu_read_lock(). */
2479 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2480 {
2481     VRingPackedDescEvent e;
2482     uint16_t old, new;
2483     bool v;
2484     VRingMemoryRegionCaches *caches;
2485 
2486     caches = vring_get_region_caches(vq);
2487     if (!caches) {
2488         return false;
2489     }
2490 
2491     vring_packed_event_read(vdev, &caches->avail, &e);
2492 
2493     old = vq->signalled_used;
2494     new = vq->signalled_used = vq->used_idx;
2495     v = vq->signalled_used_valid;
2496     vq->signalled_used_valid = true;
2497 
2498     if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2499         return false;
2500     } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2501         return true;
2502     }
2503 
2504     return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2505                                          e.off_wrap, new, old);
2506 }
2507 
2508 /* Called within rcu_read_lock().  */
2509 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2510 {
2511     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2512         return virtio_packed_should_notify(vdev, vq);
2513     } else {
2514         return virtio_split_should_notify(vdev, vq);
2515     }
2516 }
2517 
2518 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2519 {
2520     WITH_RCU_READ_LOCK_GUARD() {
2521         if (!virtio_should_notify(vdev, vq)) {
2522             return;
2523         }
2524     }
2525 
2526     trace_virtio_notify_irqfd(vdev, vq);
2527 
2528     /*
2529      * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2530      * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2531      * incorrectly polling this bit during crashdump and hibernation
2532      * in MSI mode, causing a hang if this bit is never updated.
2533      * Recent releases of Windows do not really shut down, but rather
2534      * log out and hibernate to make the next startup faster.  Hence,
2535      * this manifested as a more serious hang during shutdown with
2536      *
2537      * Next driver release from 2016 fixed this problem, so working around it
2538      * is not a must, but it's easy to do so let's do it here.
2539      *
2540      * Note: it's safe to update ISR from any thread as it was switched
2541      * to an atomic operation.
2542      */
2543     virtio_set_isr(vq->vdev, 0x1);
2544     event_notifier_set(&vq->guest_notifier);
2545 }
2546 
2547 static void virtio_irq(VirtQueue *vq)
2548 {
2549     virtio_set_isr(vq->vdev, 0x1);
2550     virtio_notify_vector(vq->vdev, vq->vector);
2551 }
2552 
2553 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2554 {
2555     WITH_RCU_READ_LOCK_GUARD() {
2556         if (!virtio_should_notify(vdev, vq)) {
2557             return;
2558         }
2559     }
2560 
2561     trace_virtio_notify(vdev, vq);
2562     virtio_irq(vq);
2563 }
2564 
2565 void virtio_notify_config(VirtIODevice *vdev)
2566 {
2567     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2568         return;
2569 
2570     virtio_set_isr(vdev, 0x3);
2571     vdev->generation++;
2572     virtio_notify_vector(vdev, vdev->config_vector);
2573 }
2574 
2575 static bool virtio_device_endian_needed(void *opaque)
2576 {
2577     VirtIODevice *vdev = opaque;
2578 
2579     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2580     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2581         return vdev->device_endian != virtio_default_endian();
2582     }
2583     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2584     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2585 }
2586 
2587 static bool virtio_64bit_features_needed(void *opaque)
2588 {
2589     VirtIODevice *vdev = opaque;
2590 
2591     return (vdev->host_features >> 32) != 0;
2592 }
2593 
2594 static bool virtio_virtqueue_needed(void *opaque)
2595 {
2596     VirtIODevice *vdev = opaque;
2597 
2598     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2599 }
2600 
2601 static bool virtio_packed_virtqueue_needed(void *opaque)
2602 {
2603     VirtIODevice *vdev = opaque;
2604 
2605     return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2606 }
2607 
2608 static bool virtio_ringsize_needed(void *opaque)
2609 {
2610     VirtIODevice *vdev = opaque;
2611     int i;
2612 
2613     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2614         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2615             return true;
2616         }
2617     }
2618     return false;
2619 }
2620 
2621 static bool virtio_extra_state_needed(void *opaque)
2622 {
2623     VirtIODevice *vdev = opaque;
2624     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2625     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2626 
2627     return k->has_extra_state &&
2628         k->has_extra_state(qbus->parent);
2629 }
2630 
2631 static bool virtio_broken_needed(void *opaque)
2632 {
2633     VirtIODevice *vdev = opaque;
2634 
2635     return vdev->broken;
2636 }
2637 
2638 static bool virtio_started_needed(void *opaque)
2639 {
2640     VirtIODevice *vdev = opaque;
2641 
2642     return vdev->started;
2643 }
2644 
2645 static bool virtio_disabled_needed(void *opaque)
2646 {
2647     VirtIODevice *vdev = opaque;
2648 
2649     return vdev->disabled;
2650 }
2651 
2652 static const VMStateDescription vmstate_virtqueue = {
2653     .name = "virtqueue_state",
2654     .version_id = 1,
2655     .minimum_version_id = 1,
2656     .fields = (VMStateField[]) {
2657         VMSTATE_UINT64(vring.avail, struct VirtQueue),
2658         VMSTATE_UINT64(vring.used, struct VirtQueue),
2659         VMSTATE_END_OF_LIST()
2660     }
2661 };
2662 
2663 static const VMStateDescription vmstate_packed_virtqueue = {
2664     .name = "packed_virtqueue_state",
2665     .version_id = 1,
2666     .minimum_version_id = 1,
2667     .fields = (VMStateField[]) {
2668         VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2669         VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2670         VMSTATE_UINT16(used_idx, struct VirtQueue),
2671         VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2672         VMSTATE_UINT32(inuse, struct VirtQueue),
2673         VMSTATE_END_OF_LIST()
2674     }
2675 };
2676 
2677 static const VMStateDescription vmstate_virtio_virtqueues = {
2678     .name = "virtio/virtqueues",
2679     .version_id = 1,
2680     .minimum_version_id = 1,
2681     .needed = &virtio_virtqueue_needed,
2682     .fields = (VMStateField[]) {
2683         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2684                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2685         VMSTATE_END_OF_LIST()
2686     }
2687 };
2688 
2689 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2690     .name = "virtio/packed_virtqueues",
2691     .version_id = 1,
2692     .minimum_version_id = 1,
2693     .needed = &virtio_packed_virtqueue_needed,
2694     .fields = (VMStateField[]) {
2695         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2696                       VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2697         VMSTATE_END_OF_LIST()
2698     }
2699 };
2700 
2701 static const VMStateDescription vmstate_ringsize = {
2702     .name = "ringsize_state",
2703     .version_id = 1,
2704     .minimum_version_id = 1,
2705     .fields = (VMStateField[]) {
2706         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2707         VMSTATE_END_OF_LIST()
2708     }
2709 };
2710 
2711 static const VMStateDescription vmstate_virtio_ringsize = {
2712     .name = "virtio/ringsize",
2713     .version_id = 1,
2714     .minimum_version_id = 1,
2715     .needed = &virtio_ringsize_needed,
2716     .fields = (VMStateField[]) {
2717         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2718                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2719         VMSTATE_END_OF_LIST()
2720     }
2721 };
2722 
2723 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2724                            const VMStateField *field)
2725 {
2726     VirtIODevice *vdev = pv;
2727     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2728     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2729 
2730     if (!k->load_extra_state) {
2731         return -1;
2732     } else {
2733         return k->load_extra_state(qbus->parent, f);
2734     }
2735 }
2736 
2737 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2738                            const VMStateField *field, JSONWriter *vmdesc)
2739 {
2740     VirtIODevice *vdev = pv;
2741     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2742     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2743 
2744     k->save_extra_state(qbus->parent, f);
2745     return 0;
2746 }
2747 
2748 static const VMStateInfo vmstate_info_extra_state = {
2749     .name = "virtqueue_extra_state",
2750     .get = get_extra_state,
2751     .put = put_extra_state,
2752 };
2753 
2754 static const VMStateDescription vmstate_virtio_extra_state = {
2755     .name = "virtio/extra_state",
2756     .version_id = 1,
2757     .minimum_version_id = 1,
2758     .needed = &virtio_extra_state_needed,
2759     .fields = (VMStateField[]) {
2760         {
2761             .name         = "extra_state",
2762             .version_id   = 0,
2763             .field_exists = NULL,
2764             .size         = 0,
2765             .info         = &vmstate_info_extra_state,
2766             .flags        = VMS_SINGLE,
2767             .offset       = 0,
2768         },
2769         VMSTATE_END_OF_LIST()
2770     }
2771 };
2772 
2773 static const VMStateDescription vmstate_virtio_device_endian = {
2774     .name = "virtio/device_endian",
2775     .version_id = 1,
2776     .minimum_version_id = 1,
2777     .needed = &virtio_device_endian_needed,
2778     .fields = (VMStateField[]) {
2779         VMSTATE_UINT8(device_endian, VirtIODevice),
2780         VMSTATE_END_OF_LIST()
2781     }
2782 };
2783 
2784 static const VMStateDescription vmstate_virtio_64bit_features = {
2785     .name = "virtio/64bit_features",
2786     .version_id = 1,
2787     .minimum_version_id = 1,
2788     .needed = &virtio_64bit_features_needed,
2789     .fields = (VMStateField[]) {
2790         VMSTATE_UINT64(guest_features, VirtIODevice),
2791         VMSTATE_END_OF_LIST()
2792     }
2793 };
2794 
2795 static const VMStateDescription vmstate_virtio_broken = {
2796     .name = "virtio/broken",
2797     .version_id = 1,
2798     .minimum_version_id = 1,
2799     .needed = &virtio_broken_needed,
2800     .fields = (VMStateField[]) {
2801         VMSTATE_BOOL(broken, VirtIODevice),
2802         VMSTATE_END_OF_LIST()
2803     }
2804 };
2805 
2806 static const VMStateDescription vmstate_virtio_started = {
2807     .name = "virtio/started",
2808     .version_id = 1,
2809     .minimum_version_id = 1,
2810     .needed = &virtio_started_needed,
2811     .fields = (VMStateField[]) {
2812         VMSTATE_BOOL(started, VirtIODevice),
2813         VMSTATE_END_OF_LIST()
2814     }
2815 };
2816 
2817 static const VMStateDescription vmstate_virtio_disabled = {
2818     .name = "virtio/disabled",
2819     .version_id = 1,
2820     .minimum_version_id = 1,
2821     .needed = &virtio_disabled_needed,
2822     .fields = (VMStateField[]) {
2823         VMSTATE_BOOL(disabled, VirtIODevice),
2824         VMSTATE_END_OF_LIST()
2825     }
2826 };
2827 
2828 static const VMStateDescription vmstate_virtio = {
2829     .name = "virtio",
2830     .version_id = 1,
2831     .minimum_version_id = 1,
2832     .minimum_version_id_old = 1,
2833     .fields = (VMStateField[]) {
2834         VMSTATE_END_OF_LIST()
2835     },
2836     .subsections = (const VMStateDescription*[]) {
2837         &vmstate_virtio_device_endian,
2838         &vmstate_virtio_64bit_features,
2839         &vmstate_virtio_virtqueues,
2840         &vmstate_virtio_ringsize,
2841         &vmstate_virtio_broken,
2842         &vmstate_virtio_extra_state,
2843         &vmstate_virtio_started,
2844         &vmstate_virtio_packed_virtqueues,
2845         &vmstate_virtio_disabled,
2846         NULL
2847     }
2848 };
2849 
2850 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2851 {
2852     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2853     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2854     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2855     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2856     int i;
2857 
2858     if (k->save_config) {
2859         k->save_config(qbus->parent, f);
2860     }
2861 
2862     qemu_put_8s(f, &vdev->status);
2863     qemu_put_8s(f, &vdev->isr);
2864     qemu_put_be16s(f, &vdev->queue_sel);
2865     qemu_put_be32s(f, &guest_features_lo);
2866     qemu_put_be32(f, vdev->config_len);
2867     qemu_put_buffer(f, vdev->config, vdev->config_len);
2868 
2869     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2870         if (vdev->vq[i].vring.num == 0)
2871             break;
2872     }
2873 
2874     qemu_put_be32(f, i);
2875 
2876     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2877         if (vdev->vq[i].vring.num == 0)
2878             break;
2879 
2880         qemu_put_be32(f, vdev->vq[i].vring.num);
2881         if (k->has_variable_vring_alignment) {
2882             qemu_put_be32(f, vdev->vq[i].vring.align);
2883         }
2884         /*
2885          * Save desc now, the rest of the ring addresses are saved in
2886          * subsections for VIRTIO-1 devices.
2887          */
2888         qemu_put_be64(f, vdev->vq[i].vring.desc);
2889         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2890         if (k->save_queue) {
2891             k->save_queue(qbus->parent, i, f);
2892         }
2893     }
2894 
2895     if (vdc->save != NULL) {
2896         vdc->save(vdev, f);
2897     }
2898 
2899     if (vdc->vmsd) {
2900         int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2901         if (ret) {
2902             return ret;
2903         }
2904     }
2905 
2906     /* Subsections */
2907     return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2908 }
2909 
2910 /* A wrapper for use as a VMState .put function */
2911 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2912                               const VMStateField *field, JSONWriter *vmdesc)
2913 {
2914     return virtio_save(VIRTIO_DEVICE(opaque), f);
2915 }
2916 
2917 /* A wrapper for use as a VMState .get function */
2918 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2919                              const VMStateField *field)
2920 {
2921     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2922     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2923 
2924     return virtio_load(vdev, f, dc->vmsd->version_id);
2925 }
2926 
2927 const VMStateInfo  virtio_vmstate_info = {
2928     .name = "virtio",
2929     .get = virtio_device_get,
2930     .put = virtio_device_put,
2931 };
2932 
2933 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2934 {
2935     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2936     bool bad = (val & ~(vdev->host_features)) != 0;
2937 
2938     val &= vdev->host_features;
2939     if (k->set_features) {
2940         k->set_features(vdev, val);
2941     }
2942     vdev->guest_features = val;
2943     return bad ? -1 : 0;
2944 }
2945 
2946 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2947 {
2948     int ret;
2949     /*
2950      * The driver must not attempt to set features after feature negotiation
2951      * has finished.
2952      */
2953     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2954         return -EINVAL;
2955     }
2956     ret = virtio_set_features_nocheck(vdev, val);
2957     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2958         /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2959         int i;
2960         for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2961             if (vdev->vq[i].vring.num != 0) {
2962                 virtio_init_region_cache(vdev, i);
2963             }
2964         }
2965     }
2966     if (!ret) {
2967         if (!virtio_device_started(vdev, vdev->status) &&
2968             !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2969             vdev->start_on_kick = true;
2970         }
2971     }
2972     return ret;
2973 }
2974 
2975 size_t virtio_feature_get_config_size(const VirtIOFeature *feature_sizes,
2976                                       uint64_t host_features)
2977 {
2978     size_t config_size = 0;
2979     int i;
2980 
2981     for (i = 0; feature_sizes[i].flags != 0; i++) {
2982         if (host_features & feature_sizes[i].flags) {
2983             config_size = MAX(feature_sizes[i].end, config_size);
2984         }
2985     }
2986 
2987     return config_size;
2988 }
2989 
2990 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
2991 {
2992     int i, ret;
2993     int32_t config_len;
2994     uint32_t num;
2995     uint32_t features;
2996     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2997     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2998     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2999 
3000     /*
3001      * We poison the endianness to ensure it does not get used before
3002      * subsections have been loaded.
3003      */
3004     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3005 
3006     if (k->load_config) {
3007         ret = k->load_config(qbus->parent, f);
3008         if (ret)
3009             return ret;
3010     }
3011 
3012     qemu_get_8s(f, &vdev->status);
3013     qemu_get_8s(f, &vdev->isr);
3014     qemu_get_be16s(f, &vdev->queue_sel);
3015     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3016         return -1;
3017     }
3018     qemu_get_be32s(f, &features);
3019 
3020     /*
3021      * Temporarily set guest_features low bits - needed by
3022      * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3023      * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3024      *
3025      * Note: devices should always test host features in future - don't create
3026      * new dependencies like this.
3027      */
3028     vdev->guest_features = features;
3029 
3030     config_len = qemu_get_be32(f);
3031 
3032     /*
3033      * There are cases where the incoming config can be bigger or smaller
3034      * than what we have; so load what we have space for, and skip
3035      * any excess that's in the stream.
3036      */
3037     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3038 
3039     while (config_len > vdev->config_len) {
3040         qemu_get_byte(f);
3041         config_len--;
3042     }
3043 
3044     num = qemu_get_be32(f);
3045 
3046     if (num > VIRTIO_QUEUE_MAX) {
3047         error_report("Invalid number of virtqueues: 0x%x", num);
3048         return -1;
3049     }
3050 
3051     for (i = 0; i < num; i++) {
3052         vdev->vq[i].vring.num = qemu_get_be32(f);
3053         if (k->has_variable_vring_alignment) {
3054             vdev->vq[i].vring.align = qemu_get_be32(f);
3055         }
3056         vdev->vq[i].vring.desc = qemu_get_be64(f);
3057         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3058         vdev->vq[i].signalled_used_valid = false;
3059         vdev->vq[i].notification = true;
3060 
3061         if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3062             error_report("VQ %d address 0x0 "
3063                          "inconsistent with Host index 0x%x",
3064                          i, vdev->vq[i].last_avail_idx);
3065             return -1;
3066         }
3067         if (k->load_queue) {
3068             ret = k->load_queue(qbus->parent, i, f);
3069             if (ret)
3070                 return ret;
3071         }
3072     }
3073 
3074     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3075 
3076     if (vdc->load != NULL) {
3077         ret = vdc->load(vdev, f, version_id);
3078         if (ret) {
3079             return ret;
3080         }
3081     }
3082 
3083     if (vdc->vmsd) {
3084         ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3085         if (ret) {
3086             return ret;
3087         }
3088     }
3089 
3090     /* Subsections */
3091     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3092     if (ret) {
3093         return ret;
3094     }
3095 
3096     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3097         vdev->device_endian = virtio_default_endian();
3098     }
3099 
3100     if (virtio_64bit_features_needed(vdev)) {
3101         /*
3102          * Subsection load filled vdev->guest_features.  Run them
3103          * through virtio_set_features to sanity-check them against
3104          * host_features.
3105          */
3106         uint64_t features64 = vdev->guest_features;
3107         if (virtio_set_features_nocheck(vdev, features64) < 0) {
3108             error_report("Features 0x%" PRIx64 " unsupported. "
3109                          "Allowed features: 0x%" PRIx64,
3110                          features64, vdev->host_features);
3111             return -1;
3112         }
3113     } else {
3114         if (virtio_set_features_nocheck(vdev, features) < 0) {
3115             error_report("Features 0x%x unsupported. "
3116                          "Allowed features: 0x%" PRIx64,
3117                          features, vdev->host_features);
3118             return -1;
3119         }
3120     }
3121 
3122     if (!virtio_device_started(vdev, vdev->status) &&
3123         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3124         vdev->start_on_kick = true;
3125     }
3126 
3127     RCU_READ_LOCK_GUARD();
3128     for (i = 0; i < num; i++) {
3129         if (vdev->vq[i].vring.desc) {
3130             uint16_t nheads;
3131 
3132             /*
3133              * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3134              * only the region cache needs to be set up.  Legacy devices need
3135              * to calculate used and avail ring addresses based on the desc
3136              * address.
3137              */
3138             if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3139                 virtio_init_region_cache(vdev, i);
3140             } else {
3141                 virtio_queue_update_rings(vdev, i);
3142             }
3143 
3144             if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3145                 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3146                 vdev->vq[i].shadow_avail_wrap_counter =
3147                                         vdev->vq[i].last_avail_wrap_counter;
3148                 continue;
3149             }
3150 
3151             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3152             /* Check it isn't doing strange things with descriptor numbers. */
3153             if (nheads > vdev->vq[i].vring.num) {
3154                 virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3155                              "inconsistent with Host index 0x%x: delta 0x%x",
3156                              i, vdev->vq[i].vring.num,
3157                              vring_avail_idx(&vdev->vq[i]),
3158                              vdev->vq[i].last_avail_idx, nheads);
3159                 vdev->vq[i].used_idx = 0;
3160                 vdev->vq[i].shadow_avail_idx = 0;
3161                 vdev->vq[i].inuse = 0;
3162                 continue;
3163             }
3164             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3165             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3166 
3167             /*
3168              * Some devices migrate VirtQueueElements that have been popped
3169              * from the avail ring but not yet returned to the used ring.
3170              * Since max ring size < UINT16_MAX it's safe to use modulo
3171              * UINT16_MAX + 1 subtraction.
3172              */
3173             vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3174                                 vdev->vq[i].used_idx);
3175             if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3176                 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3177                              "used_idx 0x%x",
3178                              i, vdev->vq[i].vring.num,
3179                              vdev->vq[i].last_avail_idx,
3180                              vdev->vq[i].used_idx);
3181                 return -1;
3182             }
3183         }
3184     }
3185 
3186     if (vdc->post_load) {
3187         ret = vdc->post_load(vdev);
3188         if (ret) {
3189             return ret;
3190         }
3191     }
3192 
3193     return 0;
3194 }
3195 
3196 void virtio_cleanup(VirtIODevice *vdev)
3197 {
3198     qemu_del_vm_change_state_handler(vdev->vmstate);
3199 }
3200 
3201 static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3202 {
3203     VirtIODevice *vdev = opaque;
3204     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3205     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3206     bool backend_run = running && virtio_device_started(vdev, vdev->status);
3207     vdev->vm_running = running;
3208 
3209     if (backend_run) {
3210         virtio_set_status(vdev, vdev->status);
3211     }
3212 
3213     if (k->vmstate_change) {
3214         k->vmstate_change(qbus->parent, backend_run);
3215     }
3216 
3217     if (!backend_run) {
3218         virtio_set_status(vdev, vdev->status);
3219     }
3220 }
3221 
3222 void virtio_instance_init_common(Object *proxy_obj, void *data,
3223                                  size_t vdev_size, const char *vdev_name)
3224 {
3225     DeviceState *vdev = data;
3226 
3227     object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3228                                        vdev_size, vdev_name, &error_abort,
3229                                        NULL);
3230     qdev_alias_all_properties(vdev, proxy_obj);
3231 }
3232 
3233 void virtio_init(VirtIODevice *vdev, const char *name,
3234                  uint16_t device_id, size_t config_size)
3235 {
3236     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3237     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3238     int i;
3239     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3240 
3241     if (nvectors) {
3242         vdev->vector_queues =
3243             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3244     }
3245 
3246     vdev->start_on_kick = false;
3247     vdev->started = false;
3248     vdev->device_id = device_id;
3249     vdev->status = 0;
3250     qatomic_set(&vdev->isr, 0);
3251     vdev->queue_sel = 0;
3252     vdev->config_vector = VIRTIO_NO_VECTOR;
3253     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
3254     vdev->vm_running = runstate_is_running();
3255     vdev->broken = false;
3256     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3257         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3258         vdev->vq[i].vdev = vdev;
3259         vdev->vq[i].queue_index = i;
3260         vdev->vq[i].host_notifier_enabled = false;
3261     }
3262 
3263     vdev->name = name;
3264     vdev->config_len = config_size;
3265     if (vdev->config_len) {
3266         vdev->config = g_malloc0(config_size);
3267     } else {
3268         vdev->config = NULL;
3269     }
3270     vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3271             virtio_vmstate_change, vdev);
3272     vdev->device_endian = virtio_default_endian();
3273     vdev->use_guest_notifier_mask = true;
3274 }
3275 
3276 /*
3277  * Only devices that have already been around prior to defining the virtio
3278  * standard support legacy mode; this includes devices not specified in the
3279  * standard. All newer devices conform to the virtio standard only.
3280  */
3281 bool virtio_legacy_allowed(VirtIODevice *vdev)
3282 {
3283     switch (vdev->device_id) {
3284     case VIRTIO_ID_NET:
3285     case VIRTIO_ID_BLOCK:
3286     case VIRTIO_ID_CONSOLE:
3287     case VIRTIO_ID_RNG:
3288     case VIRTIO_ID_BALLOON:
3289     case VIRTIO_ID_RPMSG:
3290     case VIRTIO_ID_SCSI:
3291     case VIRTIO_ID_9P:
3292     case VIRTIO_ID_RPROC_SERIAL:
3293     case VIRTIO_ID_CAIF:
3294         return true;
3295     default:
3296         return false;
3297     }
3298 }
3299 
3300 bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3301 {
3302     return vdev->disable_legacy_check;
3303 }
3304 
3305 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3306 {
3307     return vdev->vq[n].vring.desc;
3308 }
3309 
3310 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3311 {
3312     return virtio_queue_get_desc_addr(vdev, n) != 0;
3313 }
3314 
3315 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3316 {
3317     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3318     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3319 
3320     if (k->queue_enabled) {
3321         return k->queue_enabled(qbus->parent, n);
3322     }
3323     return virtio_queue_enabled_legacy(vdev, n);
3324 }
3325 
3326 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3327 {
3328     return vdev->vq[n].vring.avail;
3329 }
3330 
3331 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3332 {
3333     return vdev->vq[n].vring.used;
3334 }
3335 
3336 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3337 {
3338     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3339 }
3340 
3341 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3342 {
3343     int s;
3344 
3345     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3346         return sizeof(struct VRingPackedDescEvent);
3347     }
3348 
3349     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3350     return offsetof(VRingAvail, ring) +
3351         sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3352 }
3353 
3354 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3355 {
3356     int s;
3357 
3358     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3359         return sizeof(struct VRingPackedDescEvent);
3360     }
3361 
3362     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3363     return offsetof(VRingUsed, ring) +
3364         sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3365 }
3366 
3367 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3368                                                            int n)
3369 {
3370     unsigned int avail, used;
3371 
3372     avail = vdev->vq[n].last_avail_idx;
3373     avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3374 
3375     used = vdev->vq[n].used_idx;
3376     used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3377 
3378     return avail | used << 16;
3379 }
3380 
3381 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3382                                                       int n)
3383 {
3384     return vdev->vq[n].last_avail_idx;
3385 }
3386 
3387 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3388 {
3389     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3390         return virtio_queue_packed_get_last_avail_idx(vdev, n);
3391     } else {
3392         return virtio_queue_split_get_last_avail_idx(vdev, n);
3393     }
3394 }
3395 
3396 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3397                                                    int n, unsigned int idx)
3398 {
3399     struct VirtQueue *vq = &vdev->vq[n];
3400 
3401     vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3402     vq->last_avail_wrap_counter =
3403         vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3404     idx >>= 16;
3405     vq->used_idx = idx & 0x7ffff;
3406     vq->used_wrap_counter = !!(idx & 0x8000);
3407 }
3408 
3409 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3410                                                   int n, unsigned int idx)
3411 {
3412         vdev->vq[n].last_avail_idx = idx;
3413         vdev->vq[n].shadow_avail_idx = idx;
3414 }
3415 
3416 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3417                                      unsigned int idx)
3418 {
3419     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3420         virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3421     } else {
3422         virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3423     }
3424 }
3425 
3426 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3427                                                        int n)
3428 {
3429     /* We don't have a reference like avail idx in shared memory */
3430     return;
3431 }
3432 
3433 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3434                                                       int n)
3435 {
3436     RCU_READ_LOCK_GUARD();
3437     if (vdev->vq[n].vring.desc) {
3438         vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3439         vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3440     }
3441 }
3442 
3443 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3444 {
3445     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3446         virtio_queue_packed_restore_last_avail_idx(vdev, n);
3447     } else {
3448         virtio_queue_split_restore_last_avail_idx(vdev, n);
3449     }
3450 }
3451 
3452 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3453 {
3454     /* used idx was updated through set_last_avail_idx() */
3455     return;
3456 }
3457 
3458 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3459 {
3460     RCU_READ_LOCK_GUARD();
3461     if (vdev->vq[n].vring.desc) {
3462         vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3463     }
3464 }
3465 
3466 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3467 {
3468     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3469         return virtio_queue_packed_update_used_idx(vdev, n);
3470     } else {
3471         return virtio_split_packed_update_used_idx(vdev, n);
3472     }
3473 }
3474 
3475 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3476 {
3477     vdev->vq[n].signalled_used_valid = false;
3478 }
3479 
3480 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3481 {
3482     return vdev->vq + n;
3483 }
3484 
3485 uint16_t virtio_get_queue_index(VirtQueue *vq)
3486 {
3487     return vq->queue_index;
3488 }
3489 
3490 static void virtio_queue_guest_notifier_read(EventNotifier *n)
3491 {
3492     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3493     if (event_notifier_test_and_clear(n)) {
3494         virtio_irq(vq);
3495     }
3496 }
3497 static void virtio_config_guest_notifier_read(EventNotifier *n)
3498 {
3499     VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier);
3500 
3501     if (event_notifier_test_and_clear(n)) {
3502         virtio_notify_config(vdev);
3503     }
3504 }
3505 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3506                                                 bool with_irqfd)
3507 {
3508     if (assign && !with_irqfd) {
3509         event_notifier_set_handler(&vq->guest_notifier,
3510                                    virtio_queue_guest_notifier_read);
3511     } else {
3512         event_notifier_set_handler(&vq->guest_notifier, NULL);
3513     }
3514     if (!assign) {
3515         /* Test and clear notifier before closing it,
3516          * in case poll callback didn't have time to run. */
3517         virtio_queue_guest_notifier_read(&vq->guest_notifier);
3518     }
3519 }
3520 
3521 void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
3522                                                  bool assign, bool with_irqfd)
3523 {
3524     EventNotifier *n;
3525     n = &vdev->config_notifier;
3526     if (assign && !with_irqfd) {
3527         event_notifier_set_handler(n, virtio_config_guest_notifier_read);
3528     } else {
3529         event_notifier_set_handler(n, NULL);
3530     }
3531     if (!assign) {
3532         /* Test and clear notifier before closing it,*/
3533         /* in case poll callback didn't have time to run. */
3534         virtio_config_guest_notifier_read(n);
3535     }
3536 }
3537 
3538 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3539 {
3540     return &vq->guest_notifier;
3541 }
3542 
3543 static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
3544 {
3545     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3546     if (event_notifier_test_and_clear(n)) {
3547         virtio_queue_notify_aio_vq(vq);
3548     }
3549 }
3550 
3551 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3552 {
3553     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3554 
3555     virtio_queue_set_notification(vq, 0);
3556 }
3557 
3558 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3559 {
3560     EventNotifier *n = opaque;
3561     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3562 
3563     if (!vq->vring.desc || virtio_queue_empty(vq)) {
3564         return false;
3565     }
3566 
3567     return virtio_queue_notify_aio_vq(vq);
3568 }
3569 
3570 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3571 {
3572     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3573 
3574     /* Caller polls once more after this to catch requests that race with us */
3575     virtio_queue_set_notification(vq, 1);
3576 }
3577 
3578 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
3579                                                 VirtIOHandleAIOOutput handle_output)
3580 {
3581     if (handle_output) {
3582         vq->handle_aio_output = handle_output;
3583         aio_set_event_notifier(ctx, &vq->host_notifier, true,
3584                                virtio_queue_host_notifier_aio_read,
3585                                virtio_queue_host_notifier_aio_poll);
3586         aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3587                                     virtio_queue_host_notifier_aio_poll_begin,
3588                                     virtio_queue_host_notifier_aio_poll_end);
3589     } else {
3590         aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
3591         /* Test and clear notifier before after disabling event,
3592          * in case poll callback didn't have time to run. */
3593         virtio_queue_host_notifier_aio_read(&vq->host_notifier);
3594         vq->handle_aio_output = NULL;
3595     }
3596 }
3597 
3598 void virtio_queue_host_notifier_read(EventNotifier *n)
3599 {
3600     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3601     if (event_notifier_test_and_clear(n)) {
3602         virtio_queue_notify_vq(vq);
3603     }
3604 }
3605 
3606 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3607 {
3608     return &vq->host_notifier;
3609 }
3610 
3611 EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev)
3612 {
3613     return &vdev->config_notifier;
3614 }
3615 
3616 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3617 {
3618     vq->host_notifier_enabled = enabled;
3619 }
3620 
3621 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3622                                       MemoryRegion *mr, bool assign)
3623 {
3624     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3625     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3626 
3627     if (k->set_host_notifier_mr) {
3628         return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3629     }
3630 
3631     return -1;
3632 }
3633 
3634 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3635 {
3636     g_free(vdev->bus_name);
3637     vdev->bus_name = g_strdup(bus_name);
3638 }
3639 
3640 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3641 {
3642     va_list ap;
3643 
3644     va_start(ap, fmt);
3645     error_vreport(fmt, ap);
3646     va_end(ap);
3647 
3648     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3649         vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3650         virtio_notify_config(vdev);
3651     }
3652 
3653     vdev->broken = true;
3654 }
3655 
3656 static void virtio_memory_listener_commit(MemoryListener *listener)
3657 {
3658     VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3659     int i;
3660 
3661     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3662         if (vdev->vq[i].vring.num == 0) {
3663             break;
3664         }
3665         virtio_init_region_cache(vdev, i);
3666     }
3667 }
3668 
3669 static void virtio_device_realize(DeviceState *dev, Error **errp)
3670 {
3671     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3672     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3673     Error *err = NULL;
3674 
3675     /* Devices should either use vmsd or the load/save methods */
3676     assert(!vdc->vmsd || !vdc->load);
3677 
3678     if (vdc->realize != NULL) {
3679         vdc->realize(dev, &err);
3680         if (err != NULL) {
3681             error_propagate(errp, err);
3682             return;
3683         }
3684     }
3685 
3686     virtio_bus_device_plugged(vdev, &err);
3687     if (err != NULL) {
3688         error_propagate(errp, err);
3689         vdc->unrealize(dev);
3690         return;
3691     }
3692 
3693     vdev->listener.commit = virtio_memory_listener_commit;
3694     vdev->listener.name = "virtio";
3695     memory_listener_register(&vdev->listener, vdev->dma_as);
3696 }
3697 
3698 static void virtio_device_unrealize(DeviceState *dev)
3699 {
3700     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3701     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3702 
3703     memory_listener_unregister(&vdev->listener);
3704     virtio_bus_device_unplugged(vdev);
3705 
3706     if (vdc->unrealize != NULL) {
3707         vdc->unrealize(dev);
3708     }
3709 
3710     g_free(vdev->bus_name);
3711     vdev->bus_name = NULL;
3712 }
3713 
3714 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3715 {
3716     int i;
3717     if (!vdev->vq) {
3718         return;
3719     }
3720 
3721     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3722         if (vdev->vq[i].vring.num == 0) {
3723             break;
3724         }
3725         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3726     }
3727     g_free(vdev->vq);
3728 }
3729 
3730 static void virtio_device_instance_finalize(Object *obj)
3731 {
3732     VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3733 
3734     virtio_device_free_virtqueues(vdev);
3735 
3736     g_free(vdev->config);
3737     g_free(vdev->vector_queues);
3738 }
3739 
3740 static Property virtio_properties[] = {
3741     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3742     DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3743     DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3744     DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3745                      disable_legacy_check, false),
3746     DEFINE_PROP_END_OF_LIST(),
3747 };
3748 
3749 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3750 {
3751     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3752     int i, n, r, err;
3753 
3754     /*
3755      * Batch all the host notifiers in a single transaction to avoid
3756      * quadratic time complexity in address_space_update_ioeventfds().
3757      */
3758     memory_region_transaction_begin();
3759     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3760         VirtQueue *vq = &vdev->vq[n];
3761         if (!virtio_queue_get_num(vdev, n)) {
3762             continue;
3763         }
3764         r = virtio_bus_set_host_notifier(qbus, n, true);
3765         if (r < 0) {
3766             err = r;
3767             goto assign_error;
3768         }
3769         event_notifier_set_handler(&vq->host_notifier,
3770                                    virtio_queue_host_notifier_read);
3771     }
3772 
3773     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3774         /* Kick right away to begin processing requests already in vring */
3775         VirtQueue *vq = &vdev->vq[n];
3776         if (!vq->vring.num) {
3777             continue;
3778         }
3779         event_notifier_set(&vq->host_notifier);
3780     }
3781     memory_region_transaction_commit();
3782     return 0;
3783 
3784 assign_error:
3785     i = n; /* save n for a second iteration after transaction is committed. */
3786     while (--n >= 0) {
3787         VirtQueue *vq = &vdev->vq[n];
3788         if (!virtio_queue_get_num(vdev, n)) {
3789             continue;
3790         }
3791 
3792         event_notifier_set_handler(&vq->host_notifier, NULL);
3793         r = virtio_bus_set_host_notifier(qbus, n, false);
3794         assert(r >= 0);
3795     }
3796     /*
3797      * The transaction expects the ioeventfds to be open when it
3798      * commits. Do it now, before the cleanup loop.
3799      */
3800     memory_region_transaction_commit();
3801 
3802     while (--i >= 0) {
3803         if (!virtio_queue_get_num(vdev, i)) {
3804             continue;
3805         }
3806         virtio_bus_cleanup_host_notifier(qbus, i);
3807     }
3808     return err;
3809 }
3810 
3811 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3812 {
3813     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3814     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3815 
3816     return virtio_bus_start_ioeventfd(vbus);
3817 }
3818 
3819 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3820 {
3821     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3822     int n, r;
3823 
3824     /*
3825      * Batch all the host notifiers in a single transaction to avoid
3826      * quadratic time complexity in address_space_update_ioeventfds().
3827      */
3828     memory_region_transaction_begin();
3829     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3830         VirtQueue *vq = &vdev->vq[n];
3831 
3832         if (!virtio_queue_get_num(vdev, n)) {
3833             continue;
3834         }
3835         event_notifier_set_handler(&vq->host_notifier, NULL);
3836         r = virtio_bus_set_host_notifier(qbus, n, false);
3837         assert(r >= 0);
3838     }
3839     /*
3840      * The transaction expects the ioeventfds to be open when it
3841      * commits. Do it now, before the cleanup loop.
3842      */
3843     memory_region_transaction_commit();
3844 
3845     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3846         if (!virtio_queue_get_num(vdev, n)) {
3847             continue;
3848         }
3849         virtio_bus_cleanup_host_notifier(qbus, n);
3850     }
3851 }
3852 
3853 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3854 {
3855     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3856     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3857 
3858     return virtio_bus_grab_ioeventfd(vbus);
3859 }
3860 
3861 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3862 {
3863     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3864     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3865 
3866     virtio_bus_release_ioeventfd(vbus);
3867 }
3868 
3869 static void virtio_device_class_init(ObjectClass *klass, void *data)
3870 {
3871     /* Set the default value here. */
3872     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3873     DeviceClass *dc = DEVICE_CLASS(klass);
3874 
3875     dc->realize = virtio_device_realize;
3876     dc->unrealize = virtio_device_unrealize;
3877     dc->bus_type = TYPE_VIRTIO_BUS;
3878     device_class_set_props(dc, virtio_properties);
3879     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3880     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3881 
3882     vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3883 }
3884 
3885 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3886 {
3887     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3888     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3889 
3890     return virtio_bus_ioeventfd_enabled(vbus);
3891 }
3892 
3893 static const TypeInfo virtio_device_info = {
3894     .name = TYPE_VIRTIO_DEVICE,
3895     .parent = TYPE_DEVICE,
3896     .instance_size = sizeof(VirtIODevice),
3897     .class_init = virtio_device_class_init,
3898     .instance_finalize = virtio_device_instance_finalize,
3899     .abstract = true,
3900     .class_size = sizeof(VirtioDeviceClass),
3901 };
3902 
3903 static void virtio_register_types(void)
3904 {
3905     type_register_static(&virtio_device_info);
3906 }
3907 
3908 type_init(virtio_register_types)
3909