xref: /openbmc/qemu/hw/virtio/virtio.c (revision 740b1759)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "cpu.h"
17 #include "trace.h"
18 #include "exec/address-spaces.h"
19 #include "qemu/error-report.h"
20 #include "qemu/main-loop.h"
21 #include "qemu/module.h"
22 #include "hw/virtio/virtio.h"
23 #include "migration/qemu-file-types.h"
24 #include "qemu/atomic.h"
25 #include "hw/virtio/virtio-bus.h"
26 #include "hw/qdev-properties.h"
27 #include "hw/virtio/virtio-access.h"
28 #include "sysemu/dma.h"
29 #include "sysemu/runstate.h"
30 #include "standard-headers/linux/virtio_ids.h"
31 
32 /*
33  * The alignment to use between consumer and producer parts of vring.
34  * x86 pagesize again. This is the default, used by transports like PCI
35  * which don't provide a means for the guest to tell the host the alignment.
36  */
37 #define VIRTIO_PCI_VRING_ALIGN         4096
38 
39 typedef struct VRingDesc
40 {
41     uint64_t addr;
42     uint32_t len;
43     uint16_t flags;
44     uint16_t next;
45 } VRingDesc;
46 
47 typedef struct VRingPackedDesc {
48     uint64_t addr;
49     uint32_t len;
50     uint16_t id;
51     uint16_t flags;
52 } VRingPackedDesc;
53 
54 typedef struct VRingAvail
55 {
56     uint16_t flags;
57     uint16_t idx;
58     uint16_t ring[];
59 } VRingAvail;
60 
61 typedef struct VRingUsedElem
62 {
63     uint32_t id;
64     uint32_t len;
65 } VRingUsedElem;
66 
67 typedef struct VRingUsed
68 {
69     uint16_t flags;
70     uint16_t idx;
71     VRingUsedElem ring[];
72 } VRingUsed;
73 
74 typedef struct VRingMemoryRegionCaches {
75     struct rcu_head rcu;
76     MemoryRegionCache desc;
77     MemoryRegionCache avail;
78     MemoryRegionCache used;
79 } VRingMemoryRegionCaches;
80 
81 typedef struct VRing
82 {
83     unsigned int num;
84     unsigned int num_default;
85     unsigned int align;
86     hwaddr desc;
87     hwaddr avail;
88     hwaddr used;
89     VRingMemoryRegionCaches *caches;
90 } VRing;
91 
92 typedef struct VRingPackedDescEvent {
93     uint16_t off_wrap;
94     uint16_t flags;
95 } VRingPackedDescEvent ;
96 
97 struct VirtQueue
98 {
99     VRing vring;
100     VirtQueueElement *used_elems;
101 
102     /* Next head to pop */
103     uint16_t last_avail_idx;
104     bool last_avail_wrap_counter;
105 
106     /* Last avail_idx read from VQ. */
107     uint16_t shadow_avail_idx;
108     bool shadow_avail_wrap_counter;
109 
110     uint16_t used_idx;
111     bool used_wrap_counter;
112 
113     /* Last used index value we have signalled on */
114     uint16_t signalled_used;
115 
116     /* Last used index value we have signalled on */
117     bool signalled_used_valid;
118 
119     /* Notification enabled? */
120     bool notification;
121 
122     uint16_t queue_index;
123 
124     unsigned int inuse;
125 
126     uint16_t vector;
127     VirtIOHandleOutput handle_output;
128     VirtIOHandleAIOOutput handle_aio_output;
129     VirtIODevice *vdev;
130     EventNotifier guest_notifier;
131     EventNotifier host_notifier;
132     bool host_notifier_enabled;
133     QLIST_ENTRY(VirtQueue) node;
134 };
135 
136 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
137 {
138     if (!caches) {
139         return;
140     }
141 
142     address_space_cache_destroy(&caches->desc);
143     address_space_cache_destroy(&caches->avail);
144     address_space_cache_destroy(&caches->used);
145     g_free(caches);
146 }
147 
148 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
149 {
150     VRingMemoryRegionCaches *caches;
151 
152     caches = qatomic_read(&vq->vring.caches);
153     qatomic_rcu_set(&vq->vring.caches, NULL);
154     if (caches) {
155         call_rcu(caches, virtio_free_region_cache, rcu);
156     }
157 }
158 
159 static void virtio_init_region_cache(VirtIODevice *vdev, int n)
160 {
161     VirtQueue *vq = &vdev->vq[n];
162     VRingMemoryRegionCaches *old = vq->vring.caches;
163     VRingMemoryRegionCaches *new = NULL;
164     hwaddr addr, size;
165     int64_t len;
166     bool packed;
167 
168 
169     addr = vq->vring.desc;
170     if (!addr) {
171         goto out_no_cache;
172     }
173     new = g_new0(VRingMemoryRegionCaches, 1);
174     size = virtio_queue_get_desc_size(vdev, n);
175     packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
176                                    true : false;
177     len = address_space_cache_init(&new->desc, vdev->dma_as,
178                                    addr, size, packed);
179     if (len < size) {
180         virtio_error(vdev, "Cannot map desc");
181         goto err_desc;
182     }
183 
184     size = virtio_queue_get_used_size(vdev, n);
185     len = address_space_cache_init(&new->used, vdev->dma_as,
186                                    vq->vring.used, size, true);
187     if (len < size) {
188         virtio_error(vdev, "Cannot map used");
189         goto err_used;
190     }
191 
192     size = virtio_queue_get_avail_size(vdev, n);
193     len = address_space_cache_init(&new->avail, vdev->dma_as,
194                                    vq->vring.avail, size, false);
195     if (len < size) {
196         virtio_error(vdev, "Cannot map avail");
197         goto err_avail;
198     }
199 
200     qatomic_rcu_set(&vq->vring.caches, new);
201     if (old) {
202         call_rcu(old, virtio_free_region_cache, rcu);
203     }
204     return;
205 
206 err_avail:
207     address_space_cache_destroy(&new->avail);
208 err_used:
209     address_space_cache_destroy(&new->used);
210 err_desc:
211     address_space_cache_destroy(&new->desc);
212 out_no_cache:
213     g_free(new);
214     virtio_virtqueue_reset_region_cache(vq);
215 }
216 
217 /* virt queue functions */
218 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
219 {
220     VRing *vring = &vdev->vq[n].vring;
221 
222     if (!vring->num || !vring->desc || !vring->align) {
223         /* not yet setup -> nothing to do */
224         return;
225     }
226     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
227     vring->used = vring_align(vring->avail +
228                               offsetof(VRingAvail, ring[vring->num]),
229                               vring->align);
230     virtio_init_region_cache(vdev, n);
231 }
232 
233 /* Called within rcu_read_lock().  */
234 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
235                                   MemoryRegionCache *cache, int i)
236 {
237     address_space_read_cached(cache, i * sizeof(VRingDesc),
238                               desc, sizeof(VRingDesc));
239     virtio_tswap64s(vdev, &desc->addr);
240     virtio_tswap32s(vdev, &desc->len);
241     virtio_tswap16s(vdev, &desc->flags);
242     virtio_tswap16s(vdev, &desc->next);
243 }
244 
245 static void vring_packed_event_read(VirtIODevice *vdev,
246                                     MemoryRegionCache *cache,
247                                     VRingPackedDescEvent *e)
248 {
249     hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
250     hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
251 
252     address_space_read_cached(cache, off_flags, &e->flags,
253                               sizeof(e->flags));
254     /* Make sure flags is seen before off_wrap */
255     smp_rmb();
256     address_space_read_cached(cache, off_off, &e->off_wrap,
257                               sizeof(e->off_wrap));
258     virtio_tswap16s(vdev, &e->off_wrap);
259     virtio_tswap16s(vdev, &e->flags);
260 }
261 
262 static void vring_packed_off_wrap_write(VirtIODevice *vdev,
263                                         MemoryRegionCache *cache,
264                                         uint16_t off_wrap)
265 {
266     hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
267 
268     virtio_tswap16s(vdev, &off_wrap);
269     address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap));
270     address_space_cache_invalidate(cache, off, sizeof(off_wrap));
271 }
272 
273 static void vring_packed_flags_write(VirtIODevice *vdev,
274                                      MemoryRegionCache *cache, uint16_t flags)
275 {
276     hwaddr off = offsetof(VRingPackedDescEvent, flags);
277 
278     virtio_tswap16s(vdev, &flags);
279     address_space_write_cached(cache, off, &flags, sizeof(flags));
280     address_space_cache_invalidate(cache, off, sizeof(flags));
281 }
282 
283 /* Called within rcu_read_lock().  */
284 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
285 {
286     return qatomic_rcu_read(&vq->vring.caches);
287 }
288 
289 /* Called within rcu_read_lock().  */
290 static inline uint16_t vring_avail_flags(VirtQueue *vq)
291 {
292     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
293     hwaddr pa = offsetof(VRingAvail, flags);
294 
295     if (!caches) {
296         return 0;
297     }
298 
299     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
300 }
301 
302 /* Called within rcu_read_lock().  */
303 static inline uint16_t vring_avail_idx(VirtQueue *vq)
304 {
305     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
306     hwaddr pa = offsetof(VRingAvail, idx);
307 
308     if (!caches) {
309         return 0;
310     }
311 
312     vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
313     return vq->shadow_avail_idx;
314 }
315 
316 /* Called within rcu_read_lock().  */
317 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
318 {
319     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
320     hwaddr pa = offsetof(VRingAvail, ring[i]);
321 
322     if (!caches) {
323         return 0;
324     }
325 
326     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
327 }
328 
329 /* Called within rcu_read_lock().  */
330 static inline uint16_t vring_get_used_event(VirtQueue *vq)
331 {
332     return vring_avail_ring(vq, vq->vring.num);
333 }
334 
335 /* Called within rcu_read_lock().  */
336 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
337                                     int i)
338 {
339     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
340     hwaddr pa = offsetof(VRingUsed, ring[i]);
341 
342     if (!caches) {
343         return;
344     }
345 
346     virtio_tswap32s(vq->vdev, &uelem->id);
347     virtio_tswap32s(vq->vdev, &uelem->len);
348     address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
349     address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
350 }
351 
352 /* Called within rcu_read_lock().  */
353 static uint16_t vring_used_idx(VirtQueue *vq)
354 {
355     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
356     hwaddr pa = offsetof(VRingUsed, idx);
357 
358     if (!caches) {
359         return 0;
360     }
361 
362     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
363 }
364 
365 /* Called within rcu_read_lock().  */
366 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
367 {
368     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
369     hwaddr pa = offsetof(VRingUsed, idx);
370 
371     if (caches) {
372         virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
373         address_space_cache_invalidate(&caches->used, pa, sizeof(val));
374     }
375 
376     vq->used_idx = val;
377 }
378 
379 /* Called within rcu_read_lock().  */
380 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
381 {
382     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
383     VirtIODevice *vdev = vq->vdev;
384     hwaddr pa = offsetof(VRingUsed, flags);
385     uint16_t flags;
386 
387     if (!caches) {
388         return;
389     }
390 
391     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
392     virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
393     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
394 }
395 
396 /* Called within rcu_read_lock().  */
397 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
398 {
399     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
400     VirtIODevice *vdev = vq->vdev;
401     hwaddr pa = offsetof(VRingUsed, flags);
402     uint16_t flags;
403 
404     if (!caches) {
405         return;
406     }
407 
408     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
409     virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
410     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
411 }
412 
413 /* Called within rcu_read_lock().  */
414 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
415 {
416     VRingMemoryRegionCaches *caches;
417     hwaddr pa;
418     if (!vq->notification) {
419         return;
420     }
421 
422     caches = vring_get_region_caches(vq);
423     if (!caches) {
424         return;
425     }
426 
427     pa = offsetof(VRingUsed, ring[vq->vring.num]);
428     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
429     address_space_cache_invalidate(&caches->used, pa, sizeof(val));
430 }
431 
432 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
433 {
434     RCU_READ_LOCK_GUARD();
435 
436     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
437         vring_set_avail_event(vq, vring_avail_idx(vq));
438     } else if (enable) {
439         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
440     } else {
441         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
442     }
443     if (enable) {
444         /* Expose avail event/used flags before caller checks the avail idx. */
445         smp_mb();
446     }
447 }
448 
449 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
450 {
451     uint16_t off_wrap;
452     VRingPackedDescEvent e;
453     VRingMemoryRegionCaches *caches;
454 
455     RCU_READ_LOCK_GUARD();
456     caches = vring_get_region_caches(vq);
457     if (!caches) {
458         return;
459     }
460 
461     vring_packed_event_read(vq->vdev, &caches->used, &e);
462 
463     if (!enable) {
464         e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
465     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
466         off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
467         vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
468         /* Make sure off_wrap is wrote before flags */
469         smp_wmb();
470         e.flags = VRING_PACKED_EVENT_FLAG_DESC;
471     } else {
472         e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
473     }
474 
475     vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
476     if (enable) {
477         /* Expose avail event/used flags before caller checks the avail idx. */
478         smp_mb();
479     }
480 }
481 
482 bool virtio_queue_get_notification(VirtQueue *vq)
483 {
484     return vq->notification;
485 }
486 
487 void virtio_queue_set_notification(VirtQueue *vq, int enable)
488 {
489     vq->notification = enable;
490 
491     if (!vq->vring.desc) {
492         return;
493     }
494 
495     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
496         virtio_queue_packed_set_notification(vq, enable);
497     } else {
498         virtio_queue_split_set_notification(vq, enable);
499     }
500 }
501 
502 int virtio_queue_ready(VirtQueue *vq)
503 {
504     return vq->vring.avail != 0;
505 }
506 
507 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
508                                          uint16_t *flags,
509                                          MemoryRegionCache *cache,
510                                          int i)
511 {
512     address_space_read_cached(cache,
513                               i * sizeof(VRingPackedDesc) +
514                               offsetof(VRingPackedDesc, flags),
515                               flags, sizeof(*flags));
516     virtio_tswap16s(vdev, flags);
517 }
518 
519 static void vring_packed_desc_read(VirtIODevice *vdev,
520                                    VRingPackedDesc *desc,
521                                    MemoryRegionCache *cache,
522                                    int i, bool strict_order)
523 {
524     hwaddr off = i * sizeof(VRingPackedDesc);
525 
526     vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
527 
528     if (strict_order) {
529         /* Make sure flags is read before the rest fields. */
530         smp_rmb();
531     }
532 
533     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
534                               &desc->addr, sizeof(desc->addr));
535     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
536                               &desc->id, sizeof(desc->id));
537     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
538                               &desc->len, sizeof(desc->len));
539     virtio_tswap64s(vdev, &desc->addr);
540     virtio_tswap16s(vdev, &desc->id);
541     virtio_tswap32s(vdev, &desc->len);
542 }
543 
544 static void vring_packed_desc_write_data(VirtIODevice *vdev,
545                                          VRingPackedDesc *desc,
546                                          MemoryRegionCache *cache,
547                                          int i)
548 {
549     hwaddr off_id = i * sizeof(VRingPackedDesc) +
550                     offsetof(VRingPackedDesc, id);
551     hwaddr off_len = i * sizeof(VRingPackedDesc) +
552                     offsetof(VRingPackedDesc, len);
553 
554     virtio_tswap32s(vdev, &desc->len);
555     virtio_tswap16s(vdev, &desc->id);
556     address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
557     address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
558     address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
559     address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
560 }
561 
562 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
563                                           VRingPackedDesc *desc,
564                                           MemoryRegionCache *cache,
565                                           int i)
566 {
567     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
568 
569     virtio_tswap16s(vdev, &desc->flags);
570     address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags));
571     address_space_cache_invalidate(cache, off, sizeof(desc->flags));
572 }
573 
574 static void vring_packed_desc_write(VirtIODevice *vdev,
575                                     VRingPackedDesc *desc,
576                                     MemoryRegionCache *cache,
577                                     int i, bool strict_order)
578 {
579     vring_packed_desc_write_data(vdev, desc, cache, i);
580     if (strict_order) {
581         /* Make sure data is wrote before flags. */
582         smp_wmb();
583     }
584     vring_packed_desc_write_flags(vdev, desc, cache, i);
585 }
586 
587 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
588 {
589     bool avail, used;
590 
591     avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
592     used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
593     return (avail != used) && (avail == wrap_counter);
594 }
595 
596 /* Fetch avail_idx from VQ memory only when we really need to know if
597  * guest has added some buffers.
598  * Called within rcu_read_lock().  */
599 static int virtio_queue_empty_rcu(VirtQueue *vq)
600 {
601     if (virtio_device_disabled(vq->vdev)) {
602         return 1;
603     }
604 
605     if (unlikely(!vq->vring.avail)) {
606         return 1;
607     }
608 
609     if (vq->shadow_avail_idx != vq->last_avail_idx) {
610         return 0;
611     }
612 
613     return vring_avail_idx(vq) == vq->last_avail_idx;
614 }
615 
616 static int virtio_queue_split_empty(VirtQueue *vq)
617 {
618     bool empty;
619 
620     if (virtio_device_disabled(vq->vdev)) {
621         return 1;
622     }
623 
624     if (unlikely(!vq->vring.avail)) {
625         return 1;
626     }
627 
628     if (vq->shadow_avail_idx != vq->last_avail_idx) {
629         return 0;
630     }
631 
632     RCU_READ_LOCK_GUARD();
633     empty = vring_avail_idx(vq) == vq->last_avail_idx;
634     return empty;
635 }
636 
637 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
638 {
639     struct VRingPackedDesc desc;
640     VRingMemoryRegionCaches *cache;
641 
642     if (unlikely(!vq->vring.desc)) {
643         return 1;
644     }
645 
646     cache = vring_get_region_caches(vq);
647     if (!cache) {
648         return 1;
649     }
650 
651     vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
652                                  vq->last_avail_idx);
653 
654     return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
655 }
656 
657 static int virtio_queue_packed_empty(VirtQueue *vq)
658 {
659     RCU_READ_LOCK_GUARD();
660     return virtio_queue_packed_empty_rcu(vq);
661 }
662 
663 int virtio_queue_empty(VirtQueue *vq)
664 {
665     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
666         return virtio_queue_packed_empty(vq);
667     } else {
668         return virtio_queue_split_empty(vq);
669     }
670 }
671 
672 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
673                                unsigned int len)
674 {
675     AddressSpace *dma_as = vq->vdev->dma_as;
676     unsigned int offset;
677     int i;
678 
679     offset = 0;
680     for (i = 0; i < elem->in_num; i++) {
681         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
682 
683         dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
684                          elem->in_sg[i].iov_len,
685                          DMA_DIRECTION_FROM_DEVICE, size);
686 
687         offset += size;
688     }
689 
690     for (i = 0; i < elem->out_num; i++)
691         dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
692                          elem->out_sg[i].iov_len,
693                          DMA_DIRECTION_TO_DEVICE,
694                          elem->out_sg[i].iov_len);
695 }
696 
697 /* virtqueue_detach_element:
698  * @vq: The #VirtQueue
699  * @elem: The #VirtQueueElement
700  * @len: number of bytes written
701  *
702  * Detach the element from the virtqueue.  This function is suitable for device
703  * reset or other situations where a #VirtQueueElement is simply freed and will
704  * not be pushed or discarded.
705  */
706 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
707                               unsigned int len)
708 {
709     vq->inuse -= elem->ndescs;
710     virtqueue_unmap_sg(vq, elem, len);
711 }
712 
713 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
714 {
715     vq->last_avail_idx -= num;
716 }
717 
718 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
719 {
720     if (vq->last_avail_idx < num) {
721         vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
722         vq->last_avail_wrap_counter ^= 1;
723     } else {
724         vq->last_avail_idx -= num;
725     }
726 }
727 
728 /* virtqueue_unpop:
729  * @vq: The #VirtQueue
730  * @elem: The #VirtQueueElement
731  * @len: number of bytes written
732  *
733  * Pretend the most recent element wasn't popped from the virtqueue.  The next
734  * call to virtqueue_pop() will refetch the element.
735  */
736 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
737                      unsigned int len)
738 {
739 
740     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
741         virtqueue_packed_rewind(vq, 1);
742     } else {
743         virtqueue_split_rewind(vq, 1);
744     }
745 
746     virtqueue_detach_element(vq, elem, len);
747 }
748 
749 /* virtqueue_rewind:
750  * @vq: The #VirtQueue
751  * @num: Number of elements to push back
752  *
753  * Pretend that elements weren't popped from the virtqueue.  The next
754  * virtqueue_pop() will refetch the oldest element.
755  *
756  * Use virtqueue_unpop() instead if you have a VirtQueueElement.
757  *
758  * Returns: true on success, false if @num is greater than the number of in use
759  * elements.
760  */
761 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
762 {
763     if (num > vq->inuse) {
764         return false;
765     }
766 
767     vq->inuse -= num;
768     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
769         virtqueue_packed_rewind(vq, num);
770     } else {
771         virtqueue_split_rewind(vq, num);
772     }
773     return true;
774 }
775 
776 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
777                     unsigned int len, unsigned int idx)
778 {
779     VRingUsedElem uelem;
780 
781     if (unlikely(!vq->vring.used)) {
782         return;
783     }
784 
785     idx = (idx + vq->used_idx) % vq->vring.num;
786 
787     uelem.id = elem->index;
788     uelem.len = len;
789     vring_used_write(vq, &uelem, idx);
790 }
791 
792 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
793                                   unsigned int len, unsigned int idx)
794 {
795     vq->used_elems[idx].index = elem->index;
796     vq->used_elems[idx].len = len;
797     vq->used_elems[idx].ndescs = elem->ndescs;
798 }
799 
800 static void virtqueue_packed_fill_desc(VirtQueue *vq,
801                                        const VirtQueueElement *elem,
802                                        unsigned int idx,
803                                        bool strict_order)
804 {
805     uint16_t head;
806     VRingMemoryRegionCaches *caches;
807     VRingPackedDesc desc = {
808         .id = elem->index,
809         .len = elem->len,
810     };
811     bool wrap_counter = vq->used_wrap_counter;
812 
813     if (unlikely(!vq->vring.desc)) {
814         return;
815     }
816 
817     head = vq->used_idx + idx;
818     if (head >= vq->vring.num) {
819         head -= vq->vring.num;
820         wrap_counter ^= 1;
821     }
822     if (wrap_counter) {
823         desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
824         desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
825     } else {
826         desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
827         desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
828     }
829 
830     caches = vring_get_region_caches(vq);
831     if (!caches) {
832         return;
833     }
834 
835     vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
836 }
837 
838 /* Called within rcu_read_lock().  */
839 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
840                     unsigned int len, unsigned int idx)
841 {
842     trace_virtqueue_fill(vq, elem, len, idx);
843 
844     virtqueue_unmap_sg(vq, elem, len);
845 
846     if (virtio_device_disabled(vq->vdev)) {
847         return;
848     }
849 
850     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
851         virtqueue_packed_fill(vq, elem, len, idx);
852     } else {
853         virtqueue_split_fill(vq, elem, len, idx);
854     }
855 }
856 
857 /* Called within rcu_read_lock().  */
858 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
859 {
860     uint16_t old, new;
861 
862     if (unlikely(!vq->vring.used)) {
863         return;
864     }
865 
866     /* Make sure buffer is written before we update index. */
867     smp_wmb();
868     trace_virtqueue_flush(vq, count);
869     old = vq->used_idx;
870     new = old + count;
871     vring_used_idx_set(vq, new);
872     vq->inuse -= count;
873     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
874         vq->signalled_used_valid = false;
875 }
876 
877 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
878 {
879     unsigned int i, ndescs = 0;
880 
881     if (unlikely(!vq->vring.desc)) {
882         return;
883     }
884 
885     for (i = 1; i < count; i++) {
886         virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
887         ndescs += vq->used_elems[i].ndescs;
888     }
889     virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
890     ndescs += vq->used_elems[0].ndescs;
891 
892     vq->inuse -= ndescs;
893     vq->used_idx += ndescs;
894     if (vq->used_idx >= vq->vring.num) {
895         vq->used_idx -= vq->vring.num;
896         vq->used_wrap_counter ^= 1;
897     }
898 }
899 
900 void virtqueue_flush(VirtQueue *vq, unsigned int count)
901 {
902     if (virtio_device_disabled(vq->vdev)) {
903         vq->inuse -= count;
904         return;
905     }
906 
907     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
908         virtqueue_packed_flush(vq, count);
909     } else {
910         virtqueue_split_flush(vq, count);
911     }
912 }
913 
914 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
915                     unsigned int len)
916 {
917     RCU_READ_LOCK_GUARD();
918     virtqueue_fill(vq, elem, len, 0);
919     virtqueue_flush(vq, 1);
920 }
921 
922 /* Called within rcu_read_lock().  */
923 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
924 {
925     uint16_t num_heads = vring_avail_idx(vq) - idx;
926 
927     /* Check it isn't doing very strange things with descriptor numbers. */
928     if (num_heads > vq->vring.num) {
929         virtio_error(vq->vdev, "Guest moved used index from %u to %u",
930                      idx, vq->shadow_avail_idx);
931         return -EINVAL;
932     }
933     /* On success, callers read a descriptor at vq->last_avail_idx.
934      * Make sure descriptor read does not bypass avail index read. */
935     if (num_heads) {
936         smp_rmb();
937     }
938 
939     return num_heads;
940 }
941 
942 /* Called within rcu_read_lock().  */
943 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
944                                unsigned int *head)
945 {
946     /* Grab the next descriptor number they're advertising, and increment
947      * the index we've seen. */
948     *head = vring_avail_ring(vq, idx % vq->vring.num);
949 
950     /* If their number is silly, that's a fatal mistake. */
951     if (*head >= vq->vring.num) {
952         virtio_error(vq->vdev, "Guest says index %u is available", *head);
953         return false;
954     }
955 
956     return true;
957 }
958 
959 enum {
960     VIRTQUEUE_READ_DESC_ERROR = -1,
961     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
962     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
963 };
964 
965 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
966                                           MemoryRegionCache *desc_cache,
967                                           unsigned int max, unsigned int *next)
968 {
969     /* If this descriptor says it doesn't chain, we're done. */
970     if (!(desc->flags & VRING_DESC_F_NEXT)) {
971         return VIRTQUEUE_READ_DESC_DONE;
972     }
973 
974     /* Check they're not leading us off end of descriptors. */
975     *next = desc->next;
976     /* Make sure compiler knows to grab that: we don't want it changing! */
977     smp_wmb();
978 
979     if (*next >= max) {
980         virtio_error(vdev, "Desc next is %u", *next);
981         return VIRTQUEUE_READ_DESC_ERROR;
982     }
983 
984     vring_split_desc_read(vdev, desc, desc_cache, *next);
985     return VIRTQUEUE_READ_DESC_MORE;
986 }
987 
988 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
989                             unsigned int *in_bytes, unsigned int *out_bytes,
990                             unsigned max_in_bytes, unsigned max_out_bytes)
991 {
992     VirtIODevice *vdev = vq->vdev;
993     unsigned int max, idx;
994     unsigned int total_bufs, in_total, out_total;
995     VRingMemoryRegionCaches *caches;
996     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
997     int64_t len = 0;
998     int rc;
999 
1000     RCU_READ_LOCK_GUARD();
1001 
1002     idx = vq->last_avail_idx;
1003     total_bufs = in_total = out_total = 0;
1004 
1005     max = vq->vring.num;
1006     caches = vring_get_region_caches(vq);
1007     if (!caches) {
1008         goto err;
1009     }
1010 
1011     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1012         MemoryRegionCache *desc_cache = &caches->desc;
1013         unsigned int num_bufs;
1014         VRingDesc desc;
1015         unsigned int i;
1016 
1017         num_bufs = total_bufs;
1018 
1019         if (!virtqueue_get_head(vq, idx++, &i)) {
1020             goto err;
1021         }
1022 
1023         vring_split_desc_read(vdev, &desc, desc_cache, i);
1024 
1025         if (desc.flags & VRING_DESC_F_INDIRECT) {
1026             if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1027                 virtio_error(vdev, "Invalid size for indirect buffer table");
1028                 goto err;
1029             }
1030 
1031             /* If we've got too many, that implies a descriptor loop. */
1032             if (num_bufs >= max) {
1033                 virtio_error(vdev, "Looped descriptor");
1034                 goto err;
1035             }
1036 
1037             /* loop over the indirect descriptor table */
1038             len = address_space_cache_init(&indirect_desc_cache,
1039                                            vdev->dma_as,
1040                                            desc.addr, desc.len, false);
1041             desc_cache = &indirect_desc_cache;
1042             if (len < desc.len) {
1043                 virtio_error(vdev, "Cannot map indirect buffer");
1044                 goto err;
1045             }
1046 
1047             max = desc.len / sizeof(VRingDesc);
1048             num_bufs = i = 0;
1049             vring_split_desc_read(vdev, &desc, desc_cache, i);
1050         }
1051 
1052         do {
1053             /* If we've got too many, that implies a descriptor loop. */
1054             if (++num_bufs > max) {
1055                 virtio_error(vdev, "Looped descriptor");
1056                 goto err;
1057             }
1058 
1059             if (desc.flags & VRING_DESC_F_WRITE) {
1060                 in_total += desc.len;
1061             } else {
1062                 out_total += desc.len;
1063             }
1064             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1065                 goto done;
1066             }
1067 
1068             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1069         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1070 
1071         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1072             goto err;
1073         }
1074 
1075         if (desc_cache == &indirect_desc_cache) {
1076             address_space_cache_destroy(&indirect_desc_cache);
1077             total_bufs++;
1078         } else {
1079             total_bufs = num_bufs;
1080         }
1081     }
1082 
1083     if (rc < 0) {
1084         goto err;
1085     }
1086 
1087 done:
1088     address_space_cache_destroy(&indirect_desc_cache);
1089     if (in_bytes) {
1090         *in_bytes = in_total;
1091     }
1092     if (out_bytes) {
1093         *out_bytes = out_total;
1094     }
1095     return;
1096 
1097 err:
1098     in_total = out_total = 0;
1099     goto done;
1100 }
1101 
1102 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1103                                            VRingPackedDesc *desc,
1104                                            MemoryRegionCache
1105                                            *desc_cache,
1106                                            unsigned int max,
1107                                            unsigned int *next,
1108                                            bool indirect)
1109 {
1110     /* If this descriptor says it doesn't chain, we're done. */
1111     if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1112         return VIRTQUEUE_READ_DESC_DONE;
1113     }
1114 
1115     ++*next;
1116     if (*next == max) {
1117         if (indirect) {
1118             return VIRTQUEUE_READ_DESC_DONE;
1119         } else {
1120             (*next) -= vq->vring.num;
1121         }
1122     }
1123 
1124     vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1125     return VIRTQUEUE_READ_DESC_MORE;
1126 }
1127 
1128 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1129                                              unsigned int *in_bytes,
1130                                              unsigned int *out_bytes,
1131                                              unsigned max_in_bytes,
1132                                              unsigned max_out_bytes)
1133 {
1134     VirtIODevice *vdev = vq->vdev;
1135     unsigned int max, idx;
1136     unsigned int total_bufs, in_total, out_total;
1137     MemoryRegionCache *desc_cache;
1138     VRingMemoryRegionCaches *caches;
1139     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1140     int64_t len = 0;
1141     VRingPackedDesc desc;
1142     bool wrap_counter;
1143 
1144     RCU_READ_LOCK_GUARD();
1145     idx = vq->last_avail_idx;
1146     wrap_counter = vq->last_avail_wrap_counter;
1147     total_bufs = in_total = out_total = 0;
1148 
1149     max = vq->vring.num;
1150     caches = vring_get_region_caches(vq);
1151     if (!caches) {
1152         goto err;
1153     }
1154 
1155     for (;;) {
1156         unsigned int num_bufs = total_bufs;
1157         unsigned int i = idx;
1158         int rc;
1159 
1160         desc_cache = &caches->desc;
1161         vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1162         if (!is_desc_avail(desc.flags, wrap_counter)) {
1163             break;
1164         }
1165 
1166         if (desc.flags & VRING_DESC_F_INDIRECT) {
1167             if (desc.len % sizeof(VRingPackedDesc)) {
1168                 virtio_error(vdev, "Invalid size for indirect buffer table");
1169                 goto err;
1170             }
1171 
1172             /* If we've got too many, that implies a descriptor loop. */
1173             if (num_bufs >= max) {
1174                 virtio_error(vdev, "Looped descriptor");
1175                 goto err;
1176             }
1177 
1178             /* loop over the indirect descriptor table */
1179             len = address_space_cache_init(&indirect_desc_cache,
1180                                            vdev->dma_as,
1181                                            desc.addr, desc.len, false);
1182             desc_cache = &indirect_desc_cache;
1183             if (len < desc.len) {
1184                 virtio_error(vdev, "Cannot map indirect buffer");
1185                 goto err;
1186             }
1187 
1188             max = desc.len / sizeof(VRingPackedDesc);
1189             num_bufs = i = 0;
1190             vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1191         }
1192 
1193         do {
1194             /* If we've got too many, that implies a descriptor loop. */
1195             if (++num_bufs > max) {
1196                 virtio_error(vdev, "Looped descriptor");
1197                 goto err;
1198             }
1199 
1200             if (desc.flags & VRING_DESC_F_WRITE) {
1201                 in_total += desc.len;
1202             } else {
1203                 out_total += desc.len;
1204             }
1205             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1206                 goto done;
1207             }
1208 
1209             rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1210                                                  &i, desc_cache ==
1211                                                  &indirect_desc_cache);
1212         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1213 
1214         if (desc_cache == &indirect_desc_cache) {
1215             address_space_cache_destroy(&indirect_desc_cache);
1216             total_bufs++;
1217             idx++;
1218         } else {
1219             idx += num_bufs - total_bufs;
1220             total_bufs = num_bufs;
1221         }
1222 
1223         if (idx >= vq->vring.num) {
1224             idx -= vq->vring.num;
1225             wrap_counter ^= 1;
1226         }
1227     }
1228 
1229     /* Record the index and wrap counter for a kick we want */
1230     vq->shadow_avail_idx = idx;
1231     vq->shadow_avail_wrap_counter = wrap_counter;
1232 done:
1233     address_space_cache_destroy(&indirect_desc_cache);
1234     if (in_bytes) {
1235         *in_bytes = in_total;
1236     }
1237     if (out_bytes) {
1238         *out_bytes = out_total;
1239     }
1240     return;
1241 
1242 err:
1243     in_total = out_total = 0;
1244     goto done;
1245 }
1246 
1247 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1248                                unsigned int *out_bytes,
1249                                unsigned max_in_bytes, unsigned max_out_bytes)
1250 {
1251     uint16_t desc_size;
1252     VRingMemoryRegionCaches *caches;
1253 
1254     if (unlikely(!vq->vring.desc)) {
1255         goto err;
1256     }
1257 
1258     caches = vring_get_region_caches(vq);
1259     if (!caches) {
1260         goto err;
1261     }
1262 
1263     desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1264                                 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1265     if (caches->desc.len < vq->vring.num * desc_size) {
1266         virtio_error(vq->vdev, "Cannot map descriptor ring");
1267         goto err;
1268     }
1269 
1270     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1271         virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1272                                          max_in_bytes, max_out_bytes);
1273     } else {
1274         virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1275                                         max_in_bytes, max_out_bytes);
1276     }
1277 
1278     return;
1279 err:
1280     if (in_bytes) {
1281         *in_bytes = 0;
1282     }
1283     if (out_bytes) {
1284         *out_bytes = 0;
1285     }
1286 }
1287 
1288 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1289                           unsigned int out_bytes)
1290 {
1291     unsigned int in_total, out_total;
1292 
1293     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1294     return in_bytes <= in_total && out_bytes <= out_total;
1295 }
1296 
1297 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1298                                hwaddr *addr, struct iovec *iov,
1299                                unsigned int max_num_sg, bool is_write,
1300                                hwaddr pa, size_t sz)
1301 {
1302     bool ok = false;
1303     unsigned num_sg = *p_num_sg;
1304     assert(num_sg <= max_num_sg);
1305 
1306     if (!sz) {
1307         virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1308         goto out;
1309     }
1310 
1311     while (sz) {
1312         hwaddr len = sz;
1313 
1314         if (num_sg == max_num_sg) {
1315             virtio_error(vdev, "virtio: too many write descriptors in "
1316                                "indirect table");
1317             goto out;
1318         }
1319 
1320         iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1321                                               is_write ?
1322                                               DMA_DIRECTION_FROM_DEVICE :
1323                                               DMA_DIRECTION_TO_DEVICE);
1324         if (!iov[num_sg].iov_base) {
1325             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1326             goto out;
1327         }
1328 
1329         iov[num_sg].iov_len = len;
1330         addr[num_sg] = pa;
1331 
1332         sz -= len;
1333         pa += len;
1334         num_sg++;
1335     }
1336     ok = true;
1337 
1338 out:
1339     *p_num_sg = num_sg;
1340     return ok;
1341 }
1342 
1343 /* Only used by error code paths before we have a VirtQueueElement (therefore
1344  * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1345  * yet.
1346  */
1347 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1348                                     struct iovec *iov)
1349 {
1350     unsigned int i;
1351 
1352     for (i = 0; i < out_num + in_num; i++) {
1353         int is_write = i >= out_num;
1354 
1355         cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1356         iov++;
1357     }
1358 }
1359 
1360 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1361                                 hwaddr *addr, unsigned int num_sg,
1362                                 bool is_write)
1363 {
1364     unsigned int i;
1365     hwaddr len;
1366 
1367     for (i = 0; i < num_sg; i++) {
1368         len = sg[i].iov_len;
1369         sg[i].iov_base = dma_memory_map(vdev->dma_as,
1370                                         addr[i], &len, is_write ?
1371                                         DMA_DIRECTION_FROM_DEVICE :
1372                                         DMA_DIRECTION_TO_DEVICE);
1373         if (!sg[i].iov_base) {
1374             error_report("virtio: error trying to map MMIO memory");
1375             exit(1);
1376         }
1377         if (len != sg[i].iov_len) {
1378             error_report("virtio: unexpected memory split");
1379             exit(1);
1380         }
1381     }
1382 }
1383 
1384 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1385 {
1386     virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1387     virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1388                                                                         false);
1389 }
1390 
1391 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1392 {
1393     VirtQueueElement *elem;
1394     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1395     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1396     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1397     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1398     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1399     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1400 
1401     assert(sz >= sizeof(VirtQueueElement));
1402     elem = g_malloc(out_sg_end);
1403     trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1404     elem->out_num = out_num;
1405     elem->in_num = in_num;
1406     elem->in_addr = (void *)elem + in_addr_ofs;
1407     elem->out_addr = (void *)elem + out_addr_ofs;
1408     elem->in_sg = (void *)elem + in_sg_ofs;
1409     elem->out_sg = (void *)elem + out_sg_ofs;
1410     return elem;
1411 }
1412 
1413 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1414 {
1415     unsigned int i, head, max;
1416     VRingMemoryRegionCaches *caches;
1417     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1418     MemoryRegionCache *desc_cache;
1419     int64_t len;
1420     VirtIODevice *vdev = vq->vdev;
1421     VirtQueueElement *elem = NULL;
1422     unsigned out_num, in_num, elem_entries;
1423     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1424     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1425     VRingDesc desc;
1426     int rc;
1427 
1428     RCU_READ_LOCK_GUARD();
1429     if (virtio_queue_empty_rcu(vq)) {
1430         goto done;
1431     }
1432     /* Needed after virtio_queue_empty(), see comment in
1433      * virtqueue_num_heads(). */
1434     smp_rmb();
1435 
1436     /* When we start there are none of either input nor output. */
1437     out_num = in_num = elem_entries = 0;
1438 
1439     max = vq->vring.num;
1440 
1441     if (vq->inuse >= vq->vring.num) {
1442         virtio_error(vdev, "Virtqueue size exceeded");
1443         goto done;
1444     }
1445 
1446     if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1447         goto done;
1448     }
1449 
1450     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1451         vring_set_avail_event(vq, vq->last_avail_idx);
1452     }
1453 
1454     i = head;
1455 
1456     caches = vring_get_region_caches(vq);
1457     if (!caches) {
1458         virtio_error(vdev, "Region caches not initialized");
1459         goto done;
1460     }
1461 
1462     if (caches->desc.len < max * sizeof(VRingDesc)) {
1463         virtio_error(vdev, "Cannot map descriptor ring");
1464         goto done;
1465     }
1466 
1467     desc_cache = &caches->desc;
1468     vring_split_desc_read(vdev, &desc, desc_cache, i);
1469     if (desc.flags & VRING_DESC_F_INDIRECT) {
1470         if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1471             virtio_error(vdev, "Invalid size for indirect buffer table");
1472             goto done;
1473         }
1474 
1475         /* loop over the indirect descriptor table */
1476         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1477                                        desc.addr, desc.len, false);
1478         desc_cache = &indirect_desc_cache;
1479         if (len < desc.len) {
1480             virtio_error(vdev, "Cannot map indirect buffer");
1481             goto done;
1482         }
1483 
1484         max = desc.len / sizeof(VRingDesc);
1485         i = 0;
1486         vring_split_desc_read(vdev, &desc, desc_cache, i);
1487     }
1488 
1489     /* Collect all the descriptors */
1490     do {
1491         bool map_ok;
1492 
1493         if (desc.flags & VRING_DESC_F_WRITE) {
1494             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1495                                         iov + out_num,
1496                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1497                                         desc.addr, desc.len);
1498         } else {
1499             if (in_num) {
1500                 virtio_error(vdev, "Incorrect order for descriptors");
1501                 goto err_undo_map;
1502             }
1503             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1504                                         VIRTQUEUE_MAX_SIZE, false,
1505                                         desc.addr, desc.len);
1506         }
1507         if (!map_ok) {
1508             goto err_undo_map;
1509         }
1510 
1511         /* If we've got too many, that implies a descriptor loop. */
1512         if (++elem_entries > max) {
1513             virtio_error(vdev, "Looped descriptor");
1514             goto err_undo_map;
1515         }
1516 
1517         rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1518     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1519 
1520     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1521         goto err_undo_map;
1522     }
1523 
1524     /* Now copy what we have collected and mapped */
1525     elem = virtqueue_alloc_element(sz, out_num, in_num);
1526     elem->index = head;
1527     elem->ndescs = 1;
1528     for (i = 0; i < out_num; i++) {
1529         elem->out_addr[i] = addr[i];
1530         elem->out_sg[i] = iov[i];
1531     }
1532     for (i = 0; i < in_num; i++) {
1533         elem->in_addr[i] = addr[out_num + i];
1534         elem->in_sg[i] = iov[out_num + i];
1535     }
1536 
1537     vq->inuse++;
1538 
1539     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1540 done:
1541     address_space_cache_destroy(&indirect_desc_cache);
1542 
1543     return elem;
1544 
1545 err_undo_map:
1546     virtqueue_undo_map_desc(out_num, in_num, iov);
1547     goto done;
1548 }
1549 
1550 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1551 {
1552     unsigned int i, max;
1553     VRingMemoryRegionCaches *caches;
1554     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1555     MemoryRegionCache *desc_cache;
1556     int64_t len;
1557     VirtIODevice *vdev = vq->vdev;
1558     VirtQueueElement *elem = NULL;
1559     unsigned out_num, in_num, elem_entries;
1560     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1561     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1562     VRingPackedDesc desc;
1563     uint16_t id;
1564     int rc;
1565 
1566     RCU_READ_LOCK_GUARD();
1567     if (virtio_queue_packed_empty_rcu(vq)) {
1568         goto done;
1569     }
1570 
1571     /* When we start there are none of either input nor output. */
1572     out_num = in_num = elem_entries = 0;
1573 
1574     max = vq->vring.num;
1575 
1576     if (vq->inuse >= vq->vring.num) {
1577         virtio_error(vdev, "Virtqueue size exceeded");
1578         goto done;
1579     }
1580 
1581     i = vq->last_avail_idx;
1582 
1583     caches = vring_get_region_caches(vq);
1584     if (!caches) {
1585         virtio_error(vdev, "Region caches not initialized");
1586         goto done;
1587     }
1588 
1589     if (caches->desc.len < max * sizeof(VRingDesc)) {
1590         virtio_error(vdev, "Cannot map descriptor ring");
1591         goto done;
1592     }
1593 
1594     desc_cache = &caches->desc;
1595     vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1596     id = desc.id;
1597     if (desc.flags & VRING_DESC_F_INDIRECT) {
1598         if (desc.len % sizeof(VRingPackedDesc)) {
1599             virtio_error(vdev, "Invalid size for indirect buffer table");
1600             goto done;
1601         }
1602 
1603         /* loop over the indirect descriptor table */
1604         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1605                                        desc.addr, desc.len, false);
1606         desc_cache = &indirect_desc_cache;
1607         if (len < desc.len) {
1608             virtio_error(vdev, "Cannot map indirect buffer");
1609             goto done;
1610         }
1611 
1612         max = desc.len / sizeof(VRingPackedDesc);
1613         i = 0;
1614         vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1615     }
1616 
1617     /* Collect all the descriptors */
1618     do {
1619         bool map_ok;
1620 
1621         if (desc.flags & VRING_DESC_F_WRITE) {
1622             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1623                                         iov + out_num,
1624                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1625                                         desc.addr, desc.len);
1626         } else {
1627             if (in_num) {
1628                 virtio_error(vdev, "Incorrect order for descriptors");
1629                 goto err_undo_map;
1630             }
1631             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1632                                         VIRTQUEUE_MAX_SIZE, false,
1633                                         desc.addr, desc.len);
1634         }
1635         if (!map_ok) {
1636             goto err_undo_map;
1637         }
1638 
1639         /* If we've got too many, that implies a descriptor loop. */
1640         if (++elem_entries > max) {
1641             virtio_error(vdev, "Looped descriptor");
1642             goto err_undo_map;
1643         }
1644 
1645         rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1646                                              desc_cache ==
1647                                              &indirect_desc_cache);
1648     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1649 
1650     /* Now copy what we have collected and mapped */
1651     elem = virtqueue_alloc_element(sz, out_num, in_num);
1652     for (i = 0; i < out_num; i++) {
1653         elem->out_addr[i] = addr[i];
1654         elem->out_sg[i] = iov[i];
1655     }
1656     for (i = 0; i < in_num; i++) {
1657         elem->in_addr[i] = addr[out_num + i];
1658         elem->in_sg[i] = iov[out_num + i];
1659     }
1660 
1661     elem->index = id;
1662     elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1663     vq->last_avail_idx += elem->ndescs;
1664     vq->inuse += elem->ndescs;
1665 
1666     if (vq->last_avail_idx >= vq->vring.num) {
1667         vq->last_avail_idx -= vq->vring.num;
1668         vq->last_avail_wrap_counter ^= 1;
1669     }
1670 
1671     vq->shadow_avail_idx = vq->last_avail_idx;
1672     vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1673 
1674     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1675 done:
1676     address_space_cache_destroy(&indirect_desc_cache);
1677 
1678     return elem;
1679 
1680 err_undo_map:
1681     virtqueue_undo_map_desc(out_num, in_num, iov);
1682     goto done;
1683 }
1684 
1685 void *virtqueue_pop(VirtQueue *vq, size_t sz)
1686 {
1687     if (virtio_device_disabled(vq->vdev)) {
1688         return NULL;
1689     }
1690 
1691     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1692         return virtqueue_packed_pop(vq, sz);
1693     } else {
1694         return virtqueue_split_pop(vq, sz);
1695     }
1696 }
1697 
1698 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1699 {
1700     VRingMemoryRegionCaches *caches;
1701     MemoryRegionCache *desc_cache;
1702     unsigned int dropped = 0;
1703     VirtQueueElement elem = {};
1704     VirtIODevice *vdev = vq->vdev;
1705     VRingPackedDesc desc;
1706 
1707     caches = vring_get_region_caches(vq);
1708     if (!caches) {
1709         return 0;
1710     }
1711 
1712     desc_cache = &caches->desc;
1713 
1714     virtio_queue_set_notification(vq, 0);
1715 
1716     while (vq->inuse < vq->vring.num) {
1717         unsigned int idx = vq->last_avail_idx;
1718         /*
1719          * works similar to virtqueue_pop but does not map buffers
1720          * and does not allocate any memory.
1721          */
1722         vring_packed_desc_read(vdev, &desc, desc_cache,
1723                                vq->last_avail_idx , true);
1724         if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1725             break;
1726         }
1727         elem.index = desc.id;
1728         elem.ndescs = 1;
1729         while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1730                                                vq->vring.num, &idx, false)) {
1731             ++elem.ndescs;
1732         }
1733         /*
1734          * immediately push the element, nothing to unmap
1735          * as both in_num and out_num are set to 0.
1736          */
1737         virtqueue_push(vq, &elem, 0);
1738         dropped++;
1739         vq->last_avail_idx += elem.ndescs;
1740         if (vq->last_avail_idx >= vq->vring.num) {
1741             vq->last_avail_idx -= vq->vring.num;
1742             vq->last_avail_wrap_counter ^= 1;
1743         }
1744     }
1745 
1746     return dropped;
1747 }
1748 
1749 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1750 {
1751     unsigned int dropped = 0;
1752     VirtQueueElement elem = {};
1753     VirtIODevice *vdev = vq->vdev;
1754     bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1755 
1756     while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1757         /* works similar to virtqueue_pop but does not map buffers
1758         * and does not allocate any memory */
1759         smp_rmb();
1760         if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1761             break;
1762         }
1763         vq->inuse++;
1764         vq->last_avail_idx++;
1765         if (fEventIdx) {
1766             vring_set_avail_event(vq, vq->last_avail_idx);
1767         }
1768         /* immediately push the element, nothing to unmap
1769          * as both in_num and out_num are set to 0 */
1770         virtqueue_push(vq, &elem, 0);
1771         dropped++;
1772     }
1773 
1774     return dropped;
1775 }
1776 
1777 /* virtqueue_drop_all:
1778  * @vq: The #VirtQueue
1779  * Drops all queued buffers and indicates them to the guest
1780  * as if they are done. Useful when buffers can not be
1781  * processed but must be returned to the guest.
1782  */
1783 unsigned int virtqueue_drop_all(VirtQueue *vq)
1784 {
1785     struct VirtIODevice *vdev = vq->vdev;
1786 
1787     if (virtio_device_disabled(vq->vdev)) {
1788         return 0;
1789     }
1790 
1791     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1792         return virtqueue_packed_drop_all(vq);
1793     } else {
1794         return virtqueue_split_drop_all(vq);
1795     }
1796 }
1797 
1798 /* Reading and writing a structure directly to QEMUFile is *awful*, but
1799  * it is what QEMU has always done by mistake.  We can change it sooner
1800  * or later by bumping the version number of the affected vm states.
1801  * In the meanwhile, since the in-memory layout of VirtQueueElement
1802  * has changed, we need to marshal to and from the layout that was
1803  * used before the change.
1804  */
1805 typedef struct VirtQueueElementOld {
1806     unsigned int index;
1807     unsigned int out_num;
1808     unsigned int in_num;
1809     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1810     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1811     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1812     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1813 } VirtQueueElementOld;
1814 
1815 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1816 {
1817     VirtQueueElement *elem;
1818     VirtQueueElementOld data;
1819     int i;
1820 
1821     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1822 
1823     /* TODO: teach all callers that this can fail, and return failure instead
1824      * of asserting here.
1825      * This is just one thing (there are probably more) that must be
1826      * fixed before we can allow NDEBUG compilation.
1827      */
1828     assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1829     assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1830 
1831     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1832     elem->index = data.index;
1833 
1834     for (i = 0; i < elem->in_num; i++) {
1835         elem->in_addr[i] = data.in_addr[i];
1836     }
1837 
1838     for (i = 0; i < elem->out_num; i++) {
1839         elem->out_addr[i] = data.out_addr[i];
1840     }
1841 
1842     for (i = 0; i < elem->in_num; i++) {
1843         /* Base is overwritten by virtqueue_map.  */
1844         elem->in_sg[i].iov_base = 0;
1845         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1846     }
1847 
1848     for (i = 0; i < elem->out_num; i++) {
1849         /* Base is overwritten by virtqueue_map.  */
1850         elem->out_sg[i].iov_base = 0;
1851         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1852     }
1853 
1854     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1855         qemu_get_be32s(f, &elem->ndescs);
1856     }
1857 
1858     virtqueue_map(vdev, elem);
1859     return elem;
1860 }
1861 
1862 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1863                                 VirtQueueElement *elem)
1864 {
1865     VirtQueueElementOld data;
1866     int i;
1867 
1868     memset(&data, 0, sizeof(data));
1869     data.index = elem->index;
1870     data.in_num = elem->in_num;
1871     data.out_num = elem->out_num;
1872 
1873     for (i = 0; i < elem->in_num; i++) {
1874         data.in_addr[i] = elem->in_addr[i];
1875     }
1876 
1877     for (i = 0; i < elem->out_num; i++) {
1878         data.out_addr[i] = elem->out_addr[i];
1879     }
1880 
1881     for (i = 0; i < elem->in_num; i++) {
1882         /* Base is overwritten by virtqueue_map when loading.  Do not
1883          * save it, as it would leak the QEMU address space layout.  */
1884         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1885     }
1886 
1887     for (i = 0; i < elem->out_num; i++) {
1888         /* Do not save iov_base as above.  */
1889         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1890     }
1891 
1892     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1893         qemu_put_be32s(f, &elem->ndescs);
1894     }
1895 
1896     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1897 }
1898 
1899 /* virtio device */
1900 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1901 {
1902     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1903     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1904 
1905     if (virtio_device_disabled(vdev)) {
1906         return;
1907     }
1908 
1909     if (k->notify) {
1910         k->notify(qbus->parent, vector);
1911     }
1912 }
1913 
1914 void virtio_update_irq(VirtIODevice *vdev)
1915 {
1916     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1917 }
1918 
1919 static int virtio_validate_features(VirtIODevice *vdev)
1920 {
1921     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1922 
1923     if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1924         !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1925         return -EFAULT;
1926     }
1927 
1928     if (k->validate_features) {
1929         return k->validate_features(vdev);
1930     } else {
1931         return 0;
1932     }
1933 }
1934 
1935 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1936 {
1937     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1938     trace_virtio_set_status(vdev, val);
1939 
1940     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1941         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
1942             val & VIRTIO_CONFIG_S_FEATURES_OK) {
1943             int ret = virtio_validate_features(vdev);
1944 
1945             if (ret) {
1946                 return ret;
1947             }
1948         }
1949     }
1950 
1951     if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
1952         (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1953         virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
1954     }
1955 
1956     if (k->set_status) {
1957         k->set_status(vdev, val);
1958     }
1959     vdev->status = val;
1960 
1961     return 0;
1962 }
1963 
1964 static enum virtio_device_endian virtio_default_endian(void)
1965 {
1966     if (target_words_bigendian()) {
1967         return VIRTIO_DEVICE_ENDIAN_BIG;
1968     } else {
1969         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1970     }
1971 }
1972 
1973 static enum virtio_device_endian virtio_current_cpu_endian(void)
1974 {
1975     CPUClass *cc = CPU_GET_CLASS(current_cpu);
1976 
1977     if (cc->virtio_is_big_endian(current_cpu)) {
1978         return VIRTIO_DEVICE_ENDIAN_BIG;
1979     } else {
1980         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1981     }
1982 }
1983 
1984 void virtio_reset(void *opaque)
1985 {
1986     VirtIODevice *vdev = opaque;
1987     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1988     int i;
1989 
1990     virtio_set_status(vdev, 0);
1991     if (current_cpu) {
1992         /* Guest initiated reset */
1993         vdev->device_endian = virtio_current_cpu_endian();
1994     } else {
1995         /* System reset */
1996         vdev->device_endian = virtio_default_endian();
1997     }
1998 
1999     if (k->reset) {
2000         k->reset(vdev);
2001     }
2002 
2003     vdev->start_on_kick = false;
2004     vdev->started = false;
2005     vdev->broken = false;
2006     vdev->guest_features = 0;
2007     vdev->queue_sel = 0;
2008     vdev->status = 0;
2009     vdev->disabled = false;
2010     qatomic_set(&vdev->isr, 0);
2011     vdev->config_vector = VIRTIO_NO_VECTOR;
2012     virtio_notify_vector(vdev, vdev->config_vector);
2013 
2014     for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2015         vdev->vq[i].vring.desc = 0;
2016         vdev->vq[i].vring.avail = 0;
2017         vdev->vq[i].vring.used = 0;
2018         vdev->vq[i].last_avail_idx = 0;
2019         vdev->vq[i].shadow_avail_idx = 0;
2020         vdev->vq[i].used_idx = 0;
2021         vdev->vq[i].last_avail_wrap_counter = true;
2022         vdev->vq[i].shadow_avail_wrap_counter = true;
2023         vdev->vq[i].used_wrap_counter = true;
2024         virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2025         vdev->vq[i].signalled_used = 0;
2026         vdev->vq[i].signalled_used_valid = false;
2027         vdev->vq[i].notification = true;
2028         vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2029         vdev->vq[i].inuse = 0;
2030         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2031     }
2032 }
2033 
2034 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
2035 {
2036     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2037     uint8_t val;
2038 
2039     if (addr + sizeof(val) > vdev->config_len) {
2040         return (uint32_t)-1;
2041     }
2042 
2043     k->get_config(vdev, vdev->config);
2044 
2045     val = ldub_p(vdev->config + addr);
2046     return val;
2047 }
2048 
2049 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
2050 {
2051     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2052     uint16_t val;
2053 
2054     if (addr + sizeof(val) > vdev->config_len) {
2055         return (uint32_t)-1;
2056     }
2057 
2058     k->get_config(vdev, vdev->config);
2059 
2060     val = lduw_p(vdev->config + addr);
2061     return val;
2062 }
2063 
2064 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
2065 {
2066     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2067     uint32_t val;
2068 
2069     if (addr + sizeof(val) > vdev->config_len) {
2070         return (uint32_t)-1;
2071     }
2072 
2073     k->get_config(vdev, vdev->config);
2074 
2075     val = ldl_p(vdev->config + addr);
2076     return val;
2077 }
2078 
2079 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2080 {
2081     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2082     uint8_t val = data;
2083 
2084     if (addr + sizeof(val) > vdev->config_len) {
2085         return;
2086     }
2087 
2088     stb_p(vdev->config + addr, val);
2089 
2090     if (k->set_config) {
2091         k->set_config(vdev, vdev->config);
2092     }
2093 }
2094 
2095 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2096 {
2097     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2098     uint16_t val = data;
2099 
2100     if (addr + sizeof(val) > vdev->config_len) {
2101         return;
2102     }
2103 
2104     stw_p(vdev->config + addr, val);
2105 
2106     if (k->set_config) {
2107         k->set_config(vdev, vdev->config);
2108     }
2109 }
2110 
2111 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2112 {
2113     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2114     uint32_t val = data;
2115 
2116     if (addr + sizeof(val) > vdev->config_len) {
2117         return;
2118     }
2119 
2120     stl_p(vdev->config + addr, val);
2121 
2122     if (k->set_config) {
2123         k->set_config(vdev, vdev->config);
2124     }
2125 }
2126 
2127 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2128 {
2129     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2130     uint8_t val;
2131 
2132     if (addr + sizeof(val) > vdev->config_len) {
2133         return (uint32_t)-1;
2134     }
2135 
2136     k->get_config(vdev, vdev->config);
2137 
2138     val = ldub_p(vdev->config + addr);
2139     return val;
2140 }
2141 
2142 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2143 {
2144     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2145     uint16_t val;
2146 
2147     if (addr + sizeof(val) > vdev->config_len) {
2148         return (uint32_t)-1;
2149     }
2150 
2151     k->get_config(vdev, vdev->config);
2152 
2153     val = lduw_le_p(vdev->config + addr);
2154     return val;
2155 }
2156 
2157 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2158 {
2159     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2160     uint32_t val;
2161 
2162     if (addr + sizeof(val) > vdev->config_len) {
2163         return (uint32_t)-1;
2164     }
2165 
2166     k->get_config(vdev, vdev->config);
2167 
2168     val = ldl_le_p(vdev->config + addr);
2169     return val;
2170 }
2171 
2172 void virtio_config_modern_writeb(VirtIODevice *vdev,
2173                                  uint32_t addr, uint32_t data)
2174 {
2175     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2176     uint8_t val = data;
2177 
2178     if (addr + sizeof(val) > vdev->config_len) {
2179         return;
2180     }
2181 
2182     stb_p(vdev->config + addr, val);
2183 
2184     if (k->set_config) {
2185         k->set_config(vdev, vdev->config);
2186     }
2187 }
2188 
2189 void virtio_config_modern_writew(VirtIODevice *vdev,
2190                                  uint32_t addr, uint32_t data)
2191 {
2192     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2193     uint16_t val = data;
2194 
2195     if (addr + sizeof(val) > vdev->config_len) {
2196         return;
2197     }
2198 
2199     stw_le_p(vdev->config + addr, val);
2200 
2201     if (k->set_config) {
2202         k->set_config(vdev, vdev->config);
2203     }
2204 }
2205 
2206 void virtio_config_modern_writel(VirtIODevice *vdev,
2207                                  uint32_t addr, uint32_t data)
2208 {
2209     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2210     uint32_t val = data;
2211 
2212     if (addr + sizeof(val) > vdev->config_len) {
2213         return;
2214     }
2215 
2216     stl_le_p(vdev->config + addr, val);
2217 
2218     if (k->set_config) {
2219         k->set_config(vdev, vdev->config);
2220     }
2221 }
2222 
2223 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2224 {
2225     if (!vdev->vq[n].vring.num) {
2226         return;
2227     }
2228     vdev->vq[n].vring.desc = addr;
2229     virtio_queue_update_rings(vdev, n);
2230 }
2231 
2232 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2233 {
2234     return vdev->vq[n].vring.desc;
2235 }
2236 
2237 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2238                             hwaddr avail, hwaddr used)
2239 {
2240     if (!vdev->vq[n].vring.num) {
2241         return;
2242     }
2243     vdev->vq[n].vring.desc = desc;
2244     vdev->vq[n].vring.avail = avail;
2245     vdev->vq[n].vring.used = used;
2246     virtio_init_region_cache(vdev, n);
2247 }
2248 
2249 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2250 {
2251     /* Don't allow guest to flip queue between existent and
2252      * nonexistent states, or to set it to an invalid size.
2253      */
2254     if (!!num != !!vdev->vq[n].vring.num ||
2255         num > VIRTQUEUE_MAX_SIZE ||
2256         num < 0) {
2257         return;
2258     }
2259     vdev->vq[n].vring.num = num;
2260 }
2261 
2262 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2263 {
2264     return QLIST_FIRST(&vdev->vector_queues[vector]);
2265 }
2266 
2267 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2268 {
2269     return QLIST_NEXT(vq, node);
2270 }
2271 
2272 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2273 {
2274     return vdev->vq[n].vring.num;
2275 }
2276 
2277 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2278 {
2279     return vdev->vq[n].vring.num_default;
2280 }
2281 
2282 int virtio_get_num_queues(VirtIODevice *vdev)
2283 {
2284     int i;
2285 
2286     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2287         if (!virtio_queue_get_num(vdev, i)) {
2288             break;
2289         }
2290     }
2291 
2292     return i;
2293 }
2294 
2295 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2296 {
2297     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2298     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2299 
2300     /* virtio-1 compliant devices cannot change the alignment */
2301     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2302         error_report("tried to modify queue alignment for virtio-1 device");
2303         return;
2304     }
2305     /* Check that the transport told us it was going to do this
2306      * (so a buggy transport will immediately assert rather than
2307      * silently failing to migrate this state)
2308      */
2309     assert(k->has_variable_vring_alignment);
2310 
2311     if (align) {
2312         vdev->vq[n].vring.align = align;
2313         virtio_queue_update_rings(vdev, n);
2314     }
2315 }
2316 
2317 static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
2318 {
2319     bool ret = false;
2320 
2321     if (vq->vring.desc && vq->handle_aio_output) {
2322         VirtIODevice *vdev = vq->vdev;
2323 
2324         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2325         ret = vq->handle_aio_output(vdev, vq);
2326 
2327         if (unlikely(vdev->start_on_kick)) {
2328             virtio_set_started(vdev, true);
2329         }
2330     }
2331 
2332     return ret;
2333 }
2334 
2335 static void virtio_queue_notify_vq(VirtQueue *vq)
2336 {
2337     if (vq->vring.desc && vq->handle_output) {
2338         VirtIODevice *vdev = vq->vdev;
2339 
2340         if (unlikely(vdev->broken)) {
2341             return;
2342         }
2343 
2344         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2345         vq->handle_output(vdev, vq);
2346 
2347         if (unlikely(vdev->start_on_kick)) {
2348             virtio_set_started(vdev, true);
2349         }
2350     }
2351 }
2352 
2353 void virtio_queue_notify(VirtIODevice *vdev, int n)
2354 {
2355     VirtQueue *vq = &vdev->vq[n];
2356 
2357     if (unlikely(!vq->vring.desc || vdev->broken)) {
2358         return;
2359     }
2360 
2361     trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2362     if (vq->host_notifier_enabled) {
2363         event_notifier_set(&vq->host_notifier);
2364     } else if (vq->handle_output) {
2365         vq->handle_output(vdev, vq);
2366 
2367         if (unlikely(vdev->start_on_kick)) {
2368             virtio_set_started(vdev, true);
2369         }
2370     }
2371 }
2372 
2373 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2374 {
2375     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2376         VIRTIO_NO_VECTOR;
2377 }
2378 
2379 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2380 {
2381     VirtQueue *vq = &vdev->vq[n];
2382 
2383     if (n < VIRTIO_QUEUE_MAX) {
2384         if (vdev->vector_queues &&
2385             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2386             QLIST_REMOVE(vq, node);
2387         }
2388         vdev->vq[n].vector = vector;
2389         if (vdev->vector_queues &&
2390             vector != VIRTIO_NO_VECTOR) {
2391             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2392         }
2393     }
2394 }
2395 
2396 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2397                             VirtIOHandleOutput handle_output)
2398 {
2399     int i;
2400 
2401     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2402         if (vdev->vq[i].vring.num == 0)
2403             break;
2404     }
2405 
2406     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2407         abort();
2408 
2409     vdev->vq[i].vring.num = queue_size;
2410     vdev->vq[i].vring.num_default = queue_size;
2411     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2412     vdev->vq[i].handle_output = handle_output;
2413     vdev->vq[i].handle_aio_output = NULL;
2414     vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) *
2415                                        queue_size);
2416 
2417     return &vdev->vq[i];
2418 }
2419 
2420 void virtio_delete_queue(VirtQueue *vq)
2421 {
2422     vq->vring.num = 0;
2423     vq->vring.num_default = 0;
2424     vq->handle_output = NULL;
2425     vq->handle_aio_output = NULL;
2426     g_free(vq->used_elems);
2427     vq->used_elems = NULL;
2428     virtio_virtqueue_reset_region_cache(vq);
2429 }
2430 
2431 void virtio_del_queue(VirtIODevice *vdev, int n)
2432 {
2433     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2434         abort();
2435     }
2436 
2437     virtio_delete_queue(&vdev->vq[n]);
2438 }
2439 
2440 static void virtio_set_isr(VirtIODevice *vdev, int value)
2441 {
2442     uint8_t old = qatomic_read(&vdev->isr);
2443 
2444     /* Do not write ISR if it does not change, so that its cacheline remains
2445      * shared in the common case where the guest does not read it.
2446      */
2447     if ((old & value) != value) {
2448         qatomic_or(&vdev->isr, value);
2449     }
2450 }
2451 
2452 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2453 {
2454     uint16_t old, new;
2455     bool v;
2456     /* We need to expose used array entries before checking used event. */
2457     smp_mb();
2458     /* Always notify when queue is empty (when feature acknowledge) */
2459     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2460         !vq->inuse && virtio_queue_empty(vq)) {
2461         return true;
2462     }
2463 
2464     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2465         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2466     }
2467 
2468     v = vq->signalled_used_valid;
2469     vq->signalled_used_valid = true;
2470     old = vq->signalled_used;
2471     new = vq->signalled_used = vq->used_idx;
2472     return !v || vring_need_event(vring_get_used_event(vq), new, old);
2473 }
2474 
2475 static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2476                                     uint16_t off_wrap, uint16_t new,
2477                                     uint16_t old)
2478 {
2479     int off = off_wrap & ~(1 << 15);
2480 
2481     if (wrap != off_wrap >> 15) {
2482         off -= vq->vring.num;
2483     }
2484 
2485     return vring_need_event(off, new, old);
2486 }
2487 
2488 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2489 {
2490     VRingPackedDescEvent e;
2491     uint16_t old, new;
2492     bool v;
2493     VRingMemoryRegionCaches *caches;
2494 
2495     caches = vring_get_region_caches(vq);
2496     if (!caches) {
2497         return false;
2498     }
2499 
2500     vring_packed_event_read(vdev, &caches->avail, &e);
2501 
2502     old = vq->signalled_used;
2503     new = vq->signalled_used = vq->used_idx;
2504     v = vq->signalled_used_valid;
2505     vq->signalled_used_valid = true;
2506 
2507     if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2508         return false;
2509     } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2510         return true;
2511     }
2512 
2513     return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2514                                          e.off_wrap, new, old);
2515 }
2516 
2517 /* Called within rcu_read_lock().  */
2518 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2519 {
2520     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2521         return virtio_packed_should_notify(vdev, vq);
2522     } else {
2523         return virtio_split_should_notify(vdev, vq);
2524     }
2525 }
2526 
2527 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2528 {
2529     WITH_RCU_READ_LOCK_GUARD() {
2530         if (!virtio_should_notify(vdev, vq)) {
2531             return;
2532         }
2533     }
2534 
2535     trace_virtio_notify_irqfd(vdev, vq);
2536 
2537     /*
2538      * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2539      * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2540      * incorrectly polling this bit during crashdump and hibernation
2541      * in MSI mode, causing a hang if this bit is never updated.
2542      * Recent releases of Windows do not really shut down, but rather
2543      * log out and hibernate to make the next startup faster.  Hence,
2544      * this manifested as a more serious hang during shutdown with
2545      *
2546      * Next driver release from 2016 fixed this problem, so working around it
2547      * is not a must, but it's easy to do so let's do it here.
2548      *
2549      * Note: it's safe to update ISR from any thread as it was switched
2550      * to an atomic operation.
2551      */
2552     virtio_set_isr(vq->vdev, 0x1);
2553     event_notifier_set(&vq->guest_notifier);
2554 }
2555 
2556 static void virtio_irq(VirtQueue *vq)
2557 {
2558     virtio_set_isr(vq->vdev, 0x1);
2559     virtio_notify_vector(vq->vdev, vq->vector);
2560 }
2561 
2562 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2563 {
2564     WITH_RCU_READ_LOCK_GUARD() {
2565         if (!virtio_should_notify(vdev, vq)) {
2566             return;
2567         }
2568     }
2569 
2570     trace_virtio_notify(vdev, vq);
2571     virtio_irq(vq);
2572 }
2573 
2574 void virtio_notify_config(VirtIODevice *vdev)
2575 {
2576     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2577         return;
2578 
2579     virtio_set_isr(vdev, 0x3);
2580     vdev->generation++;
2581     virtio_notify_vector(vdev, vdev->config_vector);
2582 }
2583 
2584 static bool virtio_device_endian_needed(void *opaque)
2585 {
2586     VirtIODevice *vdev = opaque;
2587 
2588     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2589     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2590         return vdev->device_endian != virtio_default_endian();
2591     }
2592     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2593     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2594 }
2595 
2596 static bool virtio_64bit_features_needed(void *opaque)
2597 {
2598     VirtIODevice *vdev = opaque;
2599 
2600     return (vdev->host_features >> 32) != 0;
2601 }
2602 
2603 static bool virtio_virtqueue_needed(void *opaque)
2604 {
2605     VirtIODevice *vdev = opaque;
2606 
2607     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2608 }
2609 
2610 static bool virtio_packed_virtqueue_needed(void *opaque)
2611 {
2612     VirtIODevice *vdev = opaque;
2613 
2614     return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2615 }
2616 
2617 static bool virtio_ringsize_needed(void *opaque)
2618 {
2619     VirtIODevice *vdev = opaque;
2620     int i;
2621 
2622     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2623         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2624             return true;
2625         }
2626     }
2627     return false;
2628 }
2629 
2630 static bool virtio_extra_state_needed(void *opaque)
2631 {
2632     VirtIODevice *vdev = opaque;
2633     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2634     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2635 
2636     return k->has_extra_state &&
2637         k->has_extra_state(qbus->parent);
2638 }
2639 
2640 static bool virtio_broken_needed(void *opaque)
2641 {
2642     VirtIODevice *vdev = opaque;
2643 
2644     return vdev->broken;
2645 }
2646 
2647 static bool virtio_started_needed(void *opaque)
2648 {
2649     VirtIODevice *vdev = opaque;
2650 
2651     return vdev->started;
2652 }
2653 
2654 static bool virtio_disabled_needed(void *opaque)
2655 {
2656     VirtIODevice *vdev = opaque;
2657 
2658     return vdev->disabled;
2659 }
2660 
2661 static const VMStateDescription vmstate_virtqueue = {
2662     .name = "virtqueue_state",
2663     .version_id = 1,
2664     .minimum_version_id = 1,
2665     .fields = (VMStateField[]) {
2666         VMSTATE_UINT64(vring.avail, struct VirtQueue),
2667         VMSTATE_UINT64(vring.used, struct VirtQueue),
2668         VMSTATE_END_OF_LIST()
2669     }
2670 };
2671 
2672 static const VMStateDescription vmstate_packed_virtqueue = {
2673     .name = "packed_virtqueue_state",
2674     .version_id = 1,
2675     .minimum_version_id = 1,
2676     .fields = (VMStateField[]) {
2677         VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2678         VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2679         VMSTATE_UINT16(used_idx, struct VirtQueue),
2680         VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2681         VMSTATE_UINT32(inuse, struct VirtQueue),
2682         VMSTATE_END_OF_LIST()
2683     }
2684 };
2685 
2686 static const VMStateDescription vmstate_virtio_virtqueues = {
2687     .name = "virtio/virtqueues",
2688     .version_id = 1,
2689     .minimum_version_id = 1,
2690     .needed = &virtio_virtqueue_needed,
2691     .fields = (VMStateField[]) {
2692         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2693                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2694         VMSTATE_END_OF_LIST()
2695     }
2696 };
2697 
2698 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2699     .name = "virtio/packed_virtqueues",
2700     .version_id = 1,
2701     .minimum_version_id = 1,
2702     .needed = &virtio_packed_virtqueue_needed,
2703     .fields = (VMStateField[]) {
2704         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2705                       VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2706         VMSTATE_END_OF_LIST()
2707     }
2708 };
2709 
2710 static const VMStateDescription vmstate_ringsize = {
2711     .name = "ringsize_state",
2712     .version_id = 1,
2713     .minimum_version_id = 1,
2714     .fields = (VMStateField[]) {
2715         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2716         VMSTATE_END_OF_LIST()
2717     }
2718 };
2719 
2720 static const VMStateDescription vmstate_virtio_ringsize = {
2721     .name = "virtio/ringsize",
2722     .version_id = 1,
2723     .minimum_version_id = 1,
2724     .needed = &virtio_ringsize_needed,
2725     .fields = (VMStateField[]) {
2726         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2727                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2728         VMSTATE_END_OF_LIST()
2729     }
2730 };
2731 
2732 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2733                            const VMStateField *field)
2734 {
2735     VirtIODevice *vdev = pv;
2736     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2737     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2738 
2739     if (!k->load_extra_state) {
2740         return -1;
2741     } else {
2742         return k->load_extra_state(qbus->parent, f);
2743     }
2744 }
2745 
2746 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2747                            const VMStateField *field, QJSON *vmdesc)
2748 {
2749     VirtIODevice *vdev = pv;
2750     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2751     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2752 
2753     k->save_extra_state(qbus->parent, f);
2754     return 0;
2755 }
2756 
2757 static const VMStateInfo vmstate_info_extra_state = {
2758     .name = "virtqueue_extra_state",
2759     .get = get_extra_state,
2760     .put = put_extra_state,
2761 };
2762 
2763 static const VMStateDescription vmstate_virtio_extra_state = {
2764     .name = "virtio/extra_state",
2765     .version_id = 1,
2766     .minimum_version_id = 1,
2767     .needed = &virtio_extra_state_needed,
2768     .fields = (VMStateField[]) {
2769         {
2770             .name         = "extra_state",
2771             .version_id   = 0,
2772             .field_exists = NULL,
2773             .size         = 0,
2774             .info         = &vmstate_info_extra_state,
2775             .flags        = VMS_SINGLE,
2776             .offset       = 0,
2777         },
2778         VMSTATE_END_OF_LIST()
2779     }
2780 };
2781 
2782 static const VMStateDescription vmstate_virtio_device_endian = {
2783     .name = "virtio/device_endian",
2784     .version_id = 1,
2785     .minimum_version_id = 1,
2786     .needed = &virtio_device_endian_needed,
2787     .fields = (VMStateField[]) {
2788         VMSTATE_UINT8(device_endian, VirtIODevice),
2789         VMSTATE_END_OF_LIST()
2790     }
2791 };
2792 
2793 static const VMStateDescription vmstate_virtio_64bit_features = {
2794     .name = "virtio/64bit_features",
2795     .version_id = 1,
2796     .minimum_version_id = 1,
2797     .needed = &virtio_64bit_features_needed,
2798     .fields = (VMStateField[]) {
2799         VMSTATE_UINT64(guest_features, VirtIODevice),
2800         VMSTATE_END_OF_LIST()
2801     }
2802 };
2803 
2804 static const VMStateDescription vmstate_virtio_broken = {
2805     .name = "virtio/broken",
2806     .version_id = 1,
2807     .minimum_version_id = 1,
2808     .needed = &virtio_broken_needed,
2809     .fields = (VMStateField[]) {
2810         VMSTATE_BOOL(broken, VirtIODevice),
2811         VMSTATE_END_OF_LIST()
2812     }
2813 };
2814 
2815 static const VMStateDescription vmstate_virtio_started = {
2816     .name = "virtio/started",
2817     .version_id = 1,
2818     .minimum_version_id = 1,
2819     .needed = &virtio_started_needed,
2820     .fields = (VMStateField[]) {
2821         VMSTATE_BOOL(started, VirtIODevice),
2822         VMSTATE_END_OF_LIST()
2823     }
2824 };
2825 
2826 static const VMStateDescription vmstate_virtio_disabled = {
2827     .name = "virtio/disabled",
2828     .version_id = 1,
2829     .minimum_version_id = 1,
2830     .needed = &virtio_disabled_needed,
2831     .fields = (VMStateField[]) {
2832         VMSTATE_BOOL(disabled, VirtIODevice),
2833         VMSTATE_END_OF_LIST()
2834     }
2835 };
2836 
2837 static const VMStateDescription vmstate_virtio = {
2838     .name = "virtio",
2839     .version_id = 1,
2840     .minimum_version_id = 1,
2841     .minimum_version_id_old = 1,
2842     .fields = (VMStateField[]) {
2843         VMSTATE_END_OF_LIST()
2844     },
2845     .subsections = (const VMStateDescription*[]) {
2846         &vmstate_virtio_device_endian,
2847         &vmstate_virtio_64bit_features,
2848         &vmstate_virtio_virtqueues,
2849         &vmstate_virtio_ringsize,
2850         &vmstate_virtio_broken,
2851         &vmstate_virtio_extra_state,
2852         &vmstate_virtio_started,
2853         &vmstate_virtio_packed_virtqueues,
2854         &vmstate_virtio_disabled,
2855         NULL
2856     }
2857 };
2858 
2859 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2860 {
2861     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2862     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2863     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2864     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2865     int i;
2866 
2867     if (k->save_config) {
2868         k->save_config(qbus->parent, f);
2869     }
2870 
2871     qemu_put_8s(f, &vdev->status);
2872     qemu_put_8s(f, &vdev->isr);
2873     qemu_put_be16s(f, &vdev->queue_sel);
2874     qemu_put_be32s(f, &guest_features_lo);
2875     qemu_put_be32(f, vdev->config_len);
2876     qemu_put_buffer(f, vdev->config, vdev->config_len);
2877 
2878     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2879         if (vdev->vq[i].vring.num == 0)
2880             break;
2881     }
2882 
2883     qemu_put_be32(f, i);
2884 
2885     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2886         if (vdev->vq[i].vring.num == 0)
2887             break;
2888 
2889         qemu_put_be32(f, vdev->vq[i].vring.num);
2890         if (k->has_variable_vring_alignment) {
2891             qemu_put_be32(f, vdev->vq[i].vring.align);
2892         }
2893         /*
2894          * Save desc now, the rest of the ring addresses are saved in
2895          * subsections for VIRTIO-1 devices.
2896          */
2897         qemu_put_be64(f, vdev->vq[i].vring.desc);
2898         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2899         if (k->save_queue) {
2900             k->save_queue(qbus->parent, i, f);
2901         }
2902     }
2903 
2904     if (vdc->save != NULL) {
2905         vdc->save(vdev, f);
2906     }
2907 
2908     if (vdc->vmsd) {
2909         int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2910         if (ret) {
2911             return ret;
2912         }
2913     }
2914 
2915     /* Subsections */
2916     return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2917 }
2918 
2919 /* A wrapper for use as a VMState .put function */
2920 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2921                               const VMStateField *field, QJSON *vmdesc)
2922 {
2923     return virtio_save(VIRTIO_DEVICE(opaque), f);
2924 }
2925 
2926 /* A wrapper for use as a VMState .get function */
2927 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2928                              const VMStateField *field)
2929 {
2930     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2931     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2932 
2933     return virtio_load(vdev, f, dc->vmsd->version_id);
2934 }
2935 
2936 const VMStateInfo  virtio_vmstate_info = {
2937     .name = "virtio",
2938     .get = virtio_device_get,
2939     .put = virtio_device_put,
2940 };
2941 
2942 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2943 {
2944     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2945     bool bad = (val & ~(vdev->host_features)) != 0;
2946 
2947     val &= vdev->host_features;
2948     if (k->set_features) {
2949         k->set_features(vdev, val);
2950     }
2951     vdev->guest_features = val;
2952     return bad ? -1 : 0;
2953 }
2954 
2955 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2956 {
2957     int ret;
2958     /*
2959      * The driver must not attempt to set features after feature negotiation
2960      * has finished.
2961      */
2962     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2963         return -EINVAL;
2964     }
2965     ret = virtio_set_features_nocheck(vdev, val);
2966     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2967         /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2968         int i;
2969         for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2970             if (vdev->vq[i].vring.num != 0) {
2971                 virtio_init_region_cache(vdev, i);
2972             }
2973         }
2974     }
2975     if (!ret) {
2976         if (!virtio_device_started(vdev, vdev->status) &&
2977             !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2978             vdev->start_on_kick = true;
2979         }
2980     }
2981     return ret;
2982 }
2983 
2984 size_t virtio_feature_get_config_size(VirtIOFeature *feature_sizes,
2985                                       uint64_t host_features)
2986 {
2987     size_t config_size = 0;
2988     int i;
2989 
2990     for (i = 0; feature_sizes[i].flags != 0; i++) {
2991         if (host_features & feature_sizes[i].flags) {
2992             config_size = MAX(feature_sizes[i].end, config_size);
2993         }
2994     }
2995 
2996     return config_size;
2997 }
2998 
2999 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
3000 {
3001     int i, ret;
3002     int32_t config_len;
3003     uint32_t num;
3004     uint32_t features;
3005     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3006     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3007     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3008 
3009     /*
3010      * We poison the endianness to ensure it does not get used before
3011      * subsections have been loaded.
3012      */
3013     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3014 
3015     if (k->load_config) {
3016         ret = k->load_config(qbus->parent, f);
3017         if (ret)
3018             return ret;
3019     }
3020 
3021     qemu_get_8s(f, &vdev->status);
3022     qemu_get_8s(f, &vdev->isr);
3023     qemu_get_be16s(f, &vdev->queue_sel);
3024     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3025         return -1;
3026     }
3027     qemu_get_be32s(f, &features);
3028 
3029     /*
3030      * Temporarily set guest_features low bits - needed by
3031      * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3032      * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3033      *
3034      * Note: devices should always test host features in future - don't create
3035      * new dependencies like this.
3036      */
3037     vdev->guest_features = features;
3038 
3039     config_len = qemu_get_be32(f);
3040 
3041     /*
3042      * There are cases where the incoming config can be bigger or smaller
3043      * than what we have; so load what we have space for, and skip
3044      * any excess that's in the stream.
3045      */
3046     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3047 
3048     while (config_len > vdev->config_len) {
3049         qemu_get_byte(f);
3050         config_len--;
3051     }
3052 
3053     num = qemu_get_be32(f);
3054 
3055     if (num > VIRTIO_QUEUE_MAX) {
3056         error_report("Invalid number of virtqueues: 0x%x", num);
3057         return -1;
3058     }
3059 
3060     for (i = 0; i < num; i++) {
3061         vdev->vq[i].vring.num = qemu_get_be32(f);
3062         if (k->has_variable_vring_alignment) {
3063             vdev->vq[i].vring.align = qemu_get_be32(f);
3064         }
3065         vdev->vq[i].vring.desc = qemu_get_be64(f);
3066         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3067         vdev->vq[i].signalled_used_valid = false;
3068         vdev->vq[i].notification = true;
3069 
3070         if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3071             error_report("VQ %d address 0x0 "
3072                          "inconsistent with Host index 0x%x",
3073                          i, vdev->vq[i].last_avail_idx);
3074             return -1;
3075         }
3076         if (k->load_queue) {
3077             ret = k->load_queue(qbus->parent, i, f);
3078             if (ret)
3079                 return ret;
3080         }
3081     }
3082 
3083     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3084 
3085     if (vdc->load != NULL) {
3086         ret = vdc->load(vdev, f, version_id);
3087         if (ret) {
3088             return ret;
3089         }
3090     }
3091 
3092     if (vdc->vmsd) {
3093         ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3094         if (ret) {
3095             return ret;
3096         }
3097     }
3098 
3099     /* Subsections */
3100     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3101     if (ret) {
3102         return ret;
3103     }
3104 
3105     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3106         vdev->device_endian = virtio_default_endian();
3107     }
3108 
3109     if (virtio_64bit_features_needed(vdev)) {
3110         /*
3111          * Subsection load filled vdev->guest_features.  Run them
3112          * through virtio_set_features to sanity-check them against
3113          * host_features.
3114          */
3115         uint64_t features64 = vdev->guest_features;
3116         if (virtio_set_features_nocheck(vdev, features64) < 0) {
3117             error_report("Features 0x%" PRIx64 " unsupported. "
3118                          "Allowed features: 0x%" PRIx64,
3119                          features64, vdev->host_features);
3120             return -1;
3121         }
3122     } else {
3123         if (virtio_set_features_nocheck(vdev, features) < 0) {
3124             error_report("Features 0x%x unsupported. "
3125                          "Allowed features: 0x%" PRIx64,
3126                          features, vdev->host_features);
3127             return -1;
3128         }
3129     }
3130 
3131     if (!virtio_device_started(vdev, vdev->status) &&
3132         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3133         vdev->start_on_kick = true;
3134     }
3135 
3136     RCU_READ_LOCK_GUARD();
3137     for (i = 0; i < num; i++) {
3138         if (vdev->vq[i].vring.desc) {
3139             uint16_t nheads;
3140 
3141             /*
3142              * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3143              * only the region cache needs to be set up.  Legacy devices need
3144              * to calculate used and avail ring addresses based on the desc
3145              * address.
3146              */
3147             if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3148                 virtio_init_region_cache(vdev, i);
3149             } else {
3150                 virtio_queue_update_rings(vdev, i);
3151             }
3152 
3153             if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3154                 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3155                 vdev->vq[i].shadow_avail_wrap_counter =
3156                                         vdev->vq[i].last_avail_wrap_counter;
3157                 continue;
3158             }
3159 
3160             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3161             /* Check it isn't doing strange things with descriptor numbers. */
3162             if (nheads > vdev->vq[i].vring.num) {
3163                 error_report("VQ %d size 0x%x Guest index 0x%x "
3164                              "inconsistent with Host index 0x%x: delta 0x%x",
3165                              i, vdev->vq[i].vring.num,
3166                              vring_avail_idx(&vdev->vq[i]),
3167                              vdev->vq[i].last_avail_idx, nheads);
3168                 return -1;
3169             }
3170             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3171             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3172 
3173             /*
3174              * Some devices migrate VirtQueueElements that have been popped
3175              * from the avail ring but not yet returned to the used ring.
3176              * Since max ring size < UINT16_MAX it's safe to use modulo
3177              * UINT16_MAX + 1 subtraction.
3178              */
3179             vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3180                                 vdev->vq[i].used_idx);
3181             if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3182                 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3183                              "used_idx 0x%x",
3184                              i, vdev->vq[i].vring.num,
3185                              vdev->vq[i].last_avail_idx,
3186                              vdev->vq[i].used_idx);
3187                 return -1;
3188             }
3189         }
3190     }
3191 
3192     if (vdc->post_load) {
3193         ret = vdc->post_load(vdev);
3194         if (ret) {
3195             return ret;
3196         }
3197     }
3198 
3199     return 0;
3200 }
3201 
3202 void virtio_cleanup(VirtIODevice *vdev)
3203 {
3204     qemu_del_vm_change_state_handler(vdev->vmstate);
3205 }
3206 
3207 static void virtio_vmstate_change(void *opaque, int running, RunState state)
3208 {
3209     VirtIODevice *vdev = opaque;
3210     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3211     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3212     bool backend_run = running && virtio_device_started(vdev, vdev->status);
3213     vdev->vm_running = running;
3214 
3215     if (backend_run) {
3216         virtio_set_status(vdev, vdev->status);
3217     }
3218 
3219     if (k->vmstate_change) {
3220         k->vmstate_change(qbus->parent, backend_run);
3221     }
3222 
3223     if (!backend_run) {
3224         virtio_set_status(vdev, vdev->status);
3225     }
3226 }
3227 
3228 void virtio_instance_init_common(Object *proxy_obj, void *data,
3229                                  size_t vdev_size, const char *vdev_name)
3230 {
3231     DeviceState *vdev = data;
3232 
3233     object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3234                                        vdev_size, vdev_name, &error_abort,
3235                                        NULL);
3236     qdev_alias_all_properties(vdev, proxy_obj);
3237 }
3238 
3239 void virtio_init(VirtIODevice *vdev, const char *name,
3240                  uint16_t device_id, size_t config_size)
3241 {
3242     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3243     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3244     int i;
3245     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3246 
3247     if (nvectors) {
3248         vdev->vector_queues =
3249             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3250     }
3251 
3252     vdev->start_on_kick = false;
3253     vdev->started = false;
3254     vdev->device_id = device_id;
3255     vdev->status = 0;
3256     qatomic_set(&vdev->isr, 0);
3257     vdev->queue_sel = 0;
3258     vdev->config_vector = VIRTIO_NO_VECTOR;
3259     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
3260     vdev->vm_running = runstate_is_running();
3261     vdev->broken = false;
3262     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3263         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3264         vdev->vq[i].vdev = vdev;
3265         vdev->vq[i].queue_index = i;
3266         vdev->vq[i].host_notifier_enabled = false;
3267     }
3268 
3269     vdev->name = name;
3270     vdev->config_len = config_size;
3271     if (vdev->config_len) {
3272         vdev->config = g_malloc0(config_size);
3273     } else {
3274         vdev->config = NULL;
3275     }
3276     vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3277             virtio_vmstate_change, vdev);
3278     vdev->device_endian = virtio_default_endian();
3279     vdev->use_guest_notifier_mask = true;
3280 }
3281 
3282 /*
3283  * Only devices that have already been around prior to defining the virtio
3284  * standard support legacy mode; this includes devices not specified in the
3285  * standard. All newer devices conform to the virtio standard only.
3286  */
3287 bool virtio_legacy_allowed(VirtIODevice *vdev)
3288 {
3289     switch (vdev->device_id) {
3290     case VIRTIO_ID_NET:
3291     case VIRTIO_ID_BLOCK:
3292     case VIRTIO_ID_CONSOLE:
3293     case VIRTIO_ID_RNG:
3294     case VIRTIO_ID_BALLOON:
3295     case VIRTIO_ID_RPMSG:
3296     case VIRTIO_ID_SCSI:
3297     case VIRTIO_ID_9P:
3298     case VIRTIO_ID_RPROC_SERIAL:
3299     case VIRTIO_ID_CAIF:
3300         return true;
3301     default:
3302         return false;
3303     }
3304 }
3305 
3306 bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3307 {
3308     return vdev->disable_legacy_check;
3309 }
3310 
3311 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3312 {
3313     return vdev->vq[n].vring.desc;
3314 }
3315 
3316 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3317 {
3318     return virtio_queue_get_desc_addr(vdev, n) != 0;
3319 }
3320 
3321 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3322 {
3323     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3324     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3325 
3326     if (k->queue_enabled) {
3327         return k->queue_enabled(qbus->parent, n);
3328     }
3329     return virtio_queue_enabled_legacy(vdev, n);
3330 }
3331 
3332 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3333 {
3334     return vdev->vq[n].vring.avail;
3335 }
3336 
3337 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3338 {
3339     return vdev->vq[n].vring.used;
3340 }
3341 
3342 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3343 {
3344     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3345 }
3346 
3347 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3348 {
3349     int s;
3350 
3351     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3352         return sizeof(struct VRingPackedDescEvent);
3353     }
3354 
3355     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3356     return offsetof(VRingAvail, ring) +
3357         sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3358 }
3359 
3360 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3361 {
3362     int s;
3363 
3364     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3365         return sizeof(struct VRingPackedDescEvent);
3366     }
3367 
3368     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3369     return offsetof(VRingUsed, ring) +
3370         sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3371 }
3372 
3373 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3374                                                            int n)
3375 {
3376     unsigned int avail, used;
3377 
3378     avail = vdev->vq[n].last_avail_idx;
3379     avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3380 
3381     used = vdev->vq[n].used_idx;
3382     used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3383 
3384     return avail | used << 16;
3385 }
3386 
3387 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3388                                                       int n)
3389 {
3390     return vdev->vq[n].last_avail_idx;
3391 }
3392 
3393 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3394 {
3395     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3396         return virtio_queue_packed_get_last_avail_idx(vdev, n);
3397     } else {
3398         return virtio_queue_split_get_last_avail_idx(vdev, n);
3399     }
3400 }
3401 
3402 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3403                                                    int n, unsigned int idx)
3404 {
3405     struct VirtQueue *vq = &vdev->vq[n];
3406 
3407     vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3408     vq->last_avail_wrap_counter =
3409         vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3410     idx >>= 16;
3411     vq->used_idx = idx & 0x7ffff;
3412     vq->used_wrap_counter = !!(idx & 0x8000);
3413 }
3414 
3415 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3416                                                   int n, unsigned int idx)
3417 {
3418         vdev->vq[n].last_avail_idx = idx;
3419         vdev->vq[n].shadow_avail_idx = idx;
3420 }
3421 
3422 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3423                                      unsigned int idx)
3424 {
3425     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3426         virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3427     } else {
3428         virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3429     }
3430 }
3431 
3432 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3433                                                        int n)
3434 {
3435     /* We don't have a reference like avail idx in shared memory */
3436     return;
3437 }
3438 
3439 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3440                                                       int n)
3441 {
3442     RCU_READ_LOCK_GUARD();
3443     if (vdev->vq[n].vring.desc) {
3444         vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3445         vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3446     }
3447 }
3448 
3449 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3450 {
3451     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3452         virtio_queue_packed_restore_last_avail_idx(vdev, n);
3453     } else {
3454         virtio_queue_split_restore_last_avail_idx(vdev, n);
3455     }
3456 }
3457 
3458 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3459 {
3460     /* used idx was updated through set_last_avail_idx() */
3461     return;
3462 }
3463 
3464 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3465 {
3466     RCU_READ_LOCK_GUARD();
3467     if (vdev->vq[n].vring.desc) {
3468         vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3469     }
3470 }
3471 
3472 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3473 {
3474     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3475         return virtio_queue_packed_update_used_idx(vdev, n);
3476     } else {
3477         return virtio_split_packed_update_used_idx(vdev, n);
3478     }
3479 }
3480 
3481 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3482 {
3483     vdev->vq[n].signalled_used_valid = false;
3484 }
3485 
3486 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3487 {
3488     return vdev->vq + n;
3489 }
3490 
3491 uint16_t virtio_get_queue_index(VirtQueue *vq)
3492 {
3493     return vq->queue_index;
3494 }
3495 
3496 static void virtio_queue_guest_notifier_read(EventNotifier *n)
3497 {
3498     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3499     if (event_notifier_test_and_clear(n)) {
3500         virtio_irq(vq);
3501     }
3502 }
3503 
3504 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3505                                                 bool with_irqfd)
3506 {
3507     if (assign && !with_irqfd) {
3508         event_notifier_set_handler(&vq->guest_notifier,
3509                                    virtio_queue_guest_notifier_read);
3510     } else {
3511         event_notifier_set_handler(&vq->guest_notifier, NULL);
3512     }
3513     if (!assign) {
3514         /* Test and clear notifier before closing it,
3515          * in case poll callback didn't have time to run. */
3516         virtio_queue_guest_notifier_read(&vq->guest_notifier);
3517     }
3518 }
3519 
3520 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3521 {
3522     return &vq->guest_notifier;
3523 }
3524 
3525 static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
3526 {
3527     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3528     if (event_notifier_test_and_clear(n)) {
3529         virtio_queue_notify_aio_vq(vq);
3530     }
3531 }
3532 
3533 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3534 {
3535     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3536 
3537     virtio_queue_set_notification(vq, 0);
3538 }
3539 
3540 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3541 {
3542     EventNotifier *n = opaque;
3543     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3544 
3545     if (!vq->vring.desc || virtio_queue_empty(vq)) {
3546         return false;
3547     }
3548 
3549     return virtio_queue_notify_aio_vq(vq);
3550 }
3551 
3552 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3553 {
3554     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3555 
3556     /* Caller polls once more after this to catch requests that race with us */
3557     virtio_queue_set_notification(vq, 1);
3558 }
3559 
3560 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
3561                                                 VirtIOHandleAIOOutput handle_output)
3562 {
3563     if (handle_output) {
3564         vq->handle_aio_output = handle_output;
3565         aio_set_event_notifier(ctx, &vq->host_notifier, true,
3566                                virtio_queue_host_notifier_aio_read,
3567                                virtio_queue_host_notifier_aio_poll);
3568         aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3569                                     virtio_queue_host_notifier_aio_poll_begin,
3570                                     virtio_queue_host_notifier_aio_poll_end);
3571     } else {
3572         aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
3573         /* Test and clear notifier before after disabling event,
3574          * in case poll callback didn't have time to run. */
3575         virtio_queue_host_notifier_aio_read(&vq->host_notifier);
3576         vq->handle_aio_output = NULL;
3577     }
3578 }
3579 
3580 void virtio_queue_host_notifier_read(EventNotifier *n)
3581 {
3582     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3583     if (event_notifier_test_and_clear(n)) {
3584         virtio_queue_notify_vq(vq);
3585     }
3586 }
3587 
3588 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3589 {
3590     return &vq->host_notifier;
3591 }
3592 
3593 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3594 {
3595     vq->host_notifier_enabled = enabled;
3596 }
3597 
3598 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3599                                       MemoryRegion *mr, bool assign)
3600 {
3601     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3602     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3603 
3604     if (k->set_host_notifier_mr) {
3605         return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3606     }
3607 
3608     return -1;
3609 }
3610 
3611 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3612 {
3613     g_free(vdev->bus_name);
3614     vdev->bus_name = g_strdup(bus_name);
3615 }
3616 
3617 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3618 {
3619     va_list ap;
3620 
3621     va_start(ap, fmt);
3622     error_vreport(fmt, ap);
3623     va_end(ap);
3624 
3625     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3626         vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3627         virtio_notify_config(vdev);
3628     }
3629 
3630     vdev->broken = true;
3631 }
3632 
3633 static void virtio_memory_listener_commit(MemoryListener *listener)
3634 {
3635     VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3636     int i;
3637 
3638     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3639         if (vdev->vq[i].vring.num == 0) {
3640             break;
3641         }
3642         virtio_init_region_cache(vdev, i);
3643     }
3644 }
3645 
3646 static void virtio_device_realize(DeviceState *dev, Error **errp)
3647 {
3648     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3649     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3650     Error *err = NULL;
3651 
3652     /* Devices should either use vmsd or the load/save methods */
3653     assert(!vdc->vmsd || !vdc->load);
3654 
3655     if (vdc->realize != NULL) {
3656         vdc->realize(dev, &err);
3657         if (err != NULL) {
3658             error_propagate(errp, err);
3659             return;
3660         }
3661     }
3662 
3663     virtio_bus_device_plugged(vdev, &err);
3664     if (err != NULL) {
3665         error_propagate(errp, err);
3666         vdc->unrealize(dev);
3667         return;
3668     }
3669 
3670     vdev->listener.commit = virtio_memory_listener_commit;
3671     memory_listener_register(&vdev->listener, vdev->dma_as);
3672 }
3673 
3674 static void virtio_device_unrealize(DeviceState *dev)
3675 {
3676     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3677     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3678 
3679     virtio_bus_device_unplugged(vdev);
3680 
3681     if (vdc->unrealize != NULL) {
3682         vdc->unrealize(dev);
3683     }
3684 
3685     g_free(vdev->bus_name);
3686     vdev->bus_name = NULL;
3687 }
3688 
3689 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3690 {
3691     int i;
3692     if (!vdev->vq) {
3693         return;
3694     }
3695 
3696     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3697         if (vdev->vq[i].vring.num == 0) {
3698             break;
3699         }
3700         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3701     }
3702     g_free(vdev->vq);
3703 }
3704 
3705 static void virtio_device_instance_finalize(Object *obj)
3706 {
3707     VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3708 
3709     memory_listener_unregister(&vdev->listener);
3710     virtio_device_free_virtqueues(vdev);
3711 
3712     g_free(vdev->config);
3713     g_free(vdev->vector_queues);
3714 }
3715 
3716 static Property virtio_properties[] = {
3717     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3718     DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3719     DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3720     DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3721                      disable_legacy_check, false),
3722     DEFINE_PROP_END_OF_LIST(),
3723 };
3724 
3725 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3726 {
3727     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3728     int i, n, r, err;
3729 
3730     memory_region_transaction_begin();
3731     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3732         VirtQueue *vq = &vdev->vq[n];
3733         if (!virtio_queue_get_num(vdev, n)) {
3734             continue;
3735         }
3736         r = virtio_bus_set_host_notifier(qbus, n, true);
3737         if (r < 0) {
3738             err = r;
3739             goto assign_error;
3740         }
3741         event_notifier_set_handler(&vq->host_notifier,
3742                                    virtio_queue_host_notifier_read);
3743     }
3744 
3745     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3746         /* Kick right away to begin processing requests already in vring */
3747         VirtQueue *vq = &vdev->vq[n];
3748         if (!vq->vring.num) {
3749             continue;
3750         }
3751         event_notifier_set(&vq->host_notifier);
3752     }
3753     memory_region_transaction_commit();
3754     return 0;
3755 
3756 assign_error:
3757     i = n; /* save n for a second iteration after transaction is committed. */
3758     while (--n >= 0) {
3759         VirtQueue *vq = &vdev->vq[n];
3760         if (!virtio_queue_get_num(vdev, n)) {
3761             continue;
3762         }
3763 
3764         event_notifier_set_handler(&vq->host_notifier, NULL);
3765         r = virtio_bus_set_host_notifier(qbus, n, false);
3766         assert(r >= 0);
3767     }
3768     memory_region_transaction_commit();
3769 
3770     while (--i >= 0) {
3771         if (!virtio_queue_get_num(vdev, i)) {
3772             continue;
3773         }
3774         virtio_bus_cleanup_host_notifier(qbus, i);
3775     }
3776     return err;
3777 }
3778 
3779 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3780 {
3781     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3782     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3783 
3784     return virtio_bus_start_ioeventfd(vbus);
3785 }
3786 
3787 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3788 {
3789     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3790     int n, r;
3791 
3792     memory_region_transaction_begin();
3793     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3794         VirtQueue *vq = &vdev->vq[n];
3795 
3796         if (!virtio_queue_get_num(vdev, n)) {
3797             continue;
3798         }
3799         event_notifier_set_handler(&vq->host_notifier, NULL);
3800         r = virtio_bus_set_host_notifier(qbus, n, false);
3801         assert(r >= 0);
3802     }
3803     memory_region_transaction_commit();
3804 
3805     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3806         if (!virtio_queue_get_num(vdev, n)) {
3807             continue;
3808         }
3809         virtio_bus_cleanup_host_notifier(qbus, n);
3810     }
3811 }
3812 
3813 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3814 {
3815     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3816     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3817 
3818     return virtio_bus_grab_ioeventfd(vbus);
3819 }
3820 
3821 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3822 {
3823     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3824     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3825 
3826     virtio_bus_release_ioeventfd(vbus);
3827 }
3828 
3829 static void virtio_device_class_init(ObjectClass *klass, void *data)
3830 {
3831     /* Set the default value here. */
3832     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3833     DeviceClass *dc = DEVICE_CLASS(klass);
3834 
3835     dc->realize = virtio_device_realize;
3836     dc->unrealize = virtio_device_unrealize;
3837     dc->bus_type = TYPE_VIRTIO_BUS;
3838     device_class_set_props(dc, virtio_properties);
3839     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3840     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3841 
3842     vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3843 }
3844 
3845 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3846 {
3847     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3848     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3849 
3850     return virtio_bus_ioeventfd_enabled(vbus);
3851 }
3852 
3853 static const TypeInfo virtio_device_info = {
3854     .name = TYPE_VIRTIO_DEVICE,
3855     .parent = TYPE_DEVICE,
3856     .instance_size = sizeof(VirtIODevice),
3857     .class_init = virtio_device_class_init,
3858     .instance_finalize = virtio_device_instance_finalize,
3859     .abstract = true,
3860     .class_size = sizeof(VirtioDeviceClass),
3861 };
3862 
3863 static void virtio_register_types(void)
3864 {
3865     type_register_static(&virtio_device_info);
3866 }
3867 
3868 type_init(virtio_register_types)
3869