xref: /openbmc/qemu/hw/virtio/virtio.c (revision d7478d42)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "cpu.h"
17 #include "trace.h"
18 #include "qemu/error-report.h"
19 #include "qemu/log.h"
20 #include "qemu/main-loop.h"
21 #include "qemu/module.h"
22 #include "hw/virtio/virtio.h"
23 #include "migration/qemu-file-types.h"
24 #include "qemu/atomic.h"
25 #include "hw/virtio/virtio-bus.h"
26 #include "hw/qdev-properties.h"
27 #include "hw/virtio/virtio-access.h"
28 #include "sysemu/dma.h"
29 #include "sysemu/runstate.h"
30 #include "standard-headers/linux/virtio_ids.h"
31 
32 /*
33  * The alignment to use between consumer and producer parts of vring.
34  * x86 pagesize again. This is the default, used by transports like PCI
35  * which don't provide a means for the guest to tell the host the alignment.
36  */
37 #define VIRTIO_PCI_VRING_ALIGN         4096
38 
39 typedef struct VRingDesc
40 {
41     uint64_t addr;
42     uint32_t len;
43     uint16_t flags;
44     uint16_t next;
45 } VRingDesc;
46 
47 typedef struct VRingPackedDesc {
48     uint64_t addr;
49     uint32_t len;
50     uint16_t id;
51     uint16_t flags;
52 } VRingPackedDesc;
53 
54 typedef struct VRingAvail
55 {
56     uint16_t flags;
57     uint16_t idx;
58     uint16_t ring[];
59 } VRingAvail;
60 
61 typedef struct VRingUsedElem
62 {
63     uint32_t id;
64     uint32_t len;
65 } VRingUsedElem;
66 
67 typedef struct VRingUsed
68 {
69     uint16_t flags;
70     uint16_t idx;
71     VRingUsedElem ring[];
72 } VRingUsed;
73 
74 typedef struct VRingMemoryRegionCaches {
75     struct rcu_head rcu;
76     MemoryRegionCache desc;
77     MemoryRegionCache avail;
78     MemoryRegionCache used;
79 } VRingMemoryRegionCaches;
80 
81 typedef struct VRing
82 {
83     unsigned int num;
84     unsigned int num_default;
85     unsigned int align;
86     hwaddr desc;
87     hwaddr avail;
88     hwaddr used;
89     VRingMemoryRegionCaches *caches;
90 } VRing;
91 
92 typedef struct VRingPackedDescEvent {
93     uint16_t off_wrap;
94     uint16_t flags;
95 } VRingPackedDescEvent ;
96 
97 struct VirtQueue
98 {
99     VRing vring;
100     VirtQueueElement *used_elems;
101 
102     /* Next head to pop */
103     uint16_t last_avail_idx;
104     bool last_avail_wrap_counter;
105 
106     /* Last avail_idx read from VQ. */
107     uint16_t shadow_avail_idx;
108     bool shadow_avail_wrap_counter;
109 
110     uint16_t used_idx;
111     bool used_wrap_counter;
112 
113     /* Last used index value we have signalled on */
114     uint16_t signalled_used;
115 
116     /* Last used index value we have signalled on */
117     bool signalled_used_valid;
118 
119     /* Notification enabled? */
120     bool notification;
121 
122     uint16_t queue_index;
123 
124     unsigned int inuse;
125 
126     uint16_t vector;
127     VirtIOHandleOutput handle_output;
128     VirtIOHandleAIOOutput handle_aio_output;
129     VirtIODevice *vdev;
130     EventNotifier guest_notifier;
131     EventNotifier host_notifier;
132     bool host_notifier_enabled;
133     QLIST_ENTRY(VirtQueue) node;
134 };
135 
136 /* Called within call_rcu().  */
137 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
138 {
139     assert(caches != NULL);
140     address_space_cache_destroy(&caches->desc);
141     address_space_cache_destroy(&caches->avail);
142     address_space_cache_destroy(&caches->used);
143     g_free(caches);
144 }
145 
146 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
147 {
148     VRingMemoryRegionCaches *caches;
149 
150     caches = qatomic_read(&vq->vring.caches);
151     qatomic_rcu_set(&vq->vring.caches, NULL);
152     if (caches) {
153         call_rcu(caches, virtio_free_region_cache, rcu);
154     }
155 }
156 
157 static void virtio_init_region_cache(VirtIODevice *vdev, int n)
158 {
159     VirtQueue *vq = &vdev->vq[n];
160     VRingMemoryRegionCaches *old = vq->vring.caches;
161     VRingMemoryRegionCaches *new = NULL;
162     hwaddr addr, size;
163     int64_t len;
164     bool packed;
165 
166 
167     addr = vq->vring.desc;
168     if (!addr) {
169         goto out_no_cache;
170     }
171     new = g_new0(VRingMemoryRegionCaches, 1);
172     size = virtio_queue_get_desc_size(vdev, n);
173     packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
174                                    true : false;
175     len = address_space_cache_init(&new->desc, vdev->dma_as,
176                                    addr, size, packed);
177     if (len < size) {
178         virtio_error(vdev, "Cannot map desc");
179         goto err_desc;
180     }
181 
182     size = virtio_queue_get_used_size(vdev, n);
183     len = address_space_cache_init(&new->used, vdev->dma_as,
184                                    vq->vring.used, size, true);
185     if (len < size) {
186         virtio_error(vdev, "Cannot map used");
187         goto err_used;
188     }
189 
190     size = virtio_queue_get_avail_size(vdev, n);
191     len = address_space_cache_init(&new->avail, vdev->dma_as,
192                                    vq->vring.avail, size, false);
193     if (len < size) {
194         virtio_error(vdev, "Cannot map avail");
195         goto err_avail;
196     }
197 
198     qatomic_rcu_set(&vq->vring.caches, new);
199     if (old) {
200         call_rcu(old, virtio_free_region_cache, rcu);
201     }
202     return;
203 
204 err_avail:
205     address_space_cache_destroy(&new->avail);
206 err_used:
207     address_space_cache_destroy(&new->used);
208 err_desc:
209     address_space_cache_destroy(&new->desc);
210 out_no_cache:
211     g_free(new);
212     virtio_virtqueue_reset_region_cache(vq);
213 }
214 
215 /* virt queue functions */
216 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
217 {
218     VRing *vring = &vdev->vq[n].vring;
219 
220     if (!vring->num || !vring->desc || !vring->align) {
221         /* not yet setup -> nothing to do */
222         return;
223     }
224     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
225     vring->used = vring_align(vring->avail +
226                               offsetof(VRingAvail, ring[vring->num]),
227                               vring->align);
228     virtio_init_region_cache(vdev, n);
229 }
230 
231 /* Called within rcu_read_lock().  */
232 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
233                                   MemoryRegionCache *cache, int i)
234 {
235     address_space_read_cached(cache, i * sizeof(VRingDesc),
236                               desc, sizeof(VRingDesc));
237     virtio_tswap64s(vdev, &desc->addr);
238     virtio_tswap32s(vdev, &desc->len);
239     virtio_tswap16s(vdev, &desc->flags);
240     virtio_tswap16s(vdev, &desc->next);
241 }
242 
243 static void vring_packed_event_read(VirtIODevice *vdev,
244                                     MemoryRegionCache *cache,
245                                     VRingPackedDescEvent *e)
246 {
247     hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
248     hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
249 
250     e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
251     /* Make sure flags is seen before off_wrap */
252     smp_rmb();
253     e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
254     virtio_tswap16s(vdev, &e->flags);
255 }
256 
257 static void vring_packed_off_wrap_write(VirtIODevice *vdev,
258                                         MemoryRegionCache *cache,
259                                         uint16_t off_wrap)
260 {
261     hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
262 
263     virtio_stw_phys_cached(vdev, cache, off, off_wrap);
264     address_space_cache_invalidate(cache, off, sizeof(off_wrap));
265 }
266 
267 static void vring_packed_flags_write(VirtIODevice *vdev,
268                                      MemoryRegionCache *cache, uint16_t flags)
269 {
270     hwaddr off = offsetof(VRingPackedDescEvent, flags);
271 
272     virtio_stw_phys_cached(vdev, cache, off, flags);
273     address_space_cache_invalidate(cache, off, sizeof(flags));
274 }
275 
276 /* Called within rcu_read_lock().  */
277 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
278 {
279     return qatomic_rcu_read(&vq->vring.caches);
280 }
281 
282 /* Called within rcu_read_lock().  */
283 static inline uint16_t vring_avail_flags(VirtQueue *vq)
284 {
285     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
286     hwaddr pa = offsetof(VRingAvail, flags);
287 
288     if (!caches) {
289         return 0;
290     }
291 
292     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
293 }
294 
295 /* Called within rcu_read_lock().  */
296 static inline uint16_t vring_avail_idx(VirtQueue *vq)
297 {
298     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
299     hwaddr pa = offsetof(VRingAvail, idx);
300 
301     if (!caches) {
302         return 0;
303     }
304 
305     vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
306     return vq->shadow_avail_idx;
307 }
308 
309 /* Called within rcu_read_lock().  */
310 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
311 {
312     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
313     hwaddr pa = offsetof(VRingAvail, ring[i]);
314 
315     if (!caches) {
316         return 0;
317     }
318 
319     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
320 }
321 
322 /* Called within rcu_read_lock().  */
323 static inline uint16_t vring_get_used_event(VirtQueue *vq)
324 {
325     return vring_avail_ring(vq, vq->vring.num);
326 }
327 
328 /* Called within rcu_read_lock().  */
329 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
330                                     int i)
331 {
332     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
333     hwaddr pa = offsetof(VRingUsed, ring[i]);
334 
335     if (!caches) {
336         return;
337     }
338 
339     virtio_tswap32s(vq->vdev, &uelem->id);
340     virtio_tswap32s(vq->vdev, &uelem->len);
341     address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
342     address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
343 }
344 
345 /* Called within rcu_read_lock().  */
346 static uint16_t vring_used_idx(VirtQueue *vq)
347 {
348     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
349     hwaddr pa = offsetof(VRingUsed, idx);
350 
351     if (!caches) {
352         return 0;
353     }
354 
355     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
356 }
357 
358 /* Called within rcu_read_lock().  */
359 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
360 {
361     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
362     hwaddr pa = offsetof(VRingUsed, idx);
363 
364     if (caches) {
365         virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
366         address_space_cache_invalidate(&caches->used, pa, sizeof(val));
367     }
368 
369     vq->used_idx = val;
370 }
371 
372 /* Called within rcu_read_lock().  */
373 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
374 {
375     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
376     VirtIODevice *vdev = vq->vdev;
377     hwaddr pa = offsetof(VRingUsed, flags);
378     uint16_t flags;
379 
380     if (!caches) {
381         return;
382     }
383 
384     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
385     virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
386     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
387 }
388 
389 /* Called within rcu_read_lock().  */
390 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
391 {
392     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
393     VirtIODevice *vdev = vq->vdev;
394     hwaddr pa = offsetof(VRingUsed, flags);
395     uint16_t flags;
396 
397     if (!caches) {
398         return;
399     }
400 
401     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
402     virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
403     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
404 }
405 
406 /* Called within rcu_read_lock().  */
407 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
408 {
409     VRingMemoryRegionCaches *caches;
410     hwaddr pa;
411     if (!vq->notification) {
412         return;
413     }
414 
415     caches = vring_get_region_caches(vq);
416     if (!caches) {
417         return;
418     }
419 
420     pa = offsetof(VRingUsed, ring[vq->vring.num]);
421     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
422     address_space_cache_invalidate(&caches->used, pa, sizeof(val));
423 }
424 
425 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
426 {
427     RCU_READ_LOCK_GUARD();
428 
429     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
430         vring_set_avail_event(vq, vring_avail_idx(vq));
431     } else if (enable) {
432         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
433     } else {
434         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
435     }
436     if (enable) {
437         /* Expose avail event/used flags before caller checks the avail idx. */
438         smp_mb();
439     }
440 }
441 
442 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
443 {
444     uint16_t off_wrap;
445     VRingPackedDescEvent e;
446     VRingMemoryRegionCaches *caches;
447 
448     RCU_READ_LOCK_GUARD();
449     caches = vring_get_region_caches(vq);
450     if (!caches) {
451         return;
452     }
453 
454     vring_packed_event_read(vq->vdev, &caches->used, &e);
455 
456     if (!enable) {
457         e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
458     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
459         off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
460         vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
461         /* Make sure off_wrap is wrote before flags */
462         smp_wmb();
463         e.flags = VRING_PACKED_EVENT_FLAG_DESC;
464     } else {
465         e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
466     }
467 
468     vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
469     if (enable) {
470         /* Expose avail event/used flags before caller checks the avail idx. */
471         smp_mb();
472     }
473 }
474 
475 bool virtio_queue_get_notification(VirtQueue *vq)
476 {
477     return vq->notification;
478 }
479 
480 void virtio_queue_set_notification(VirtQueue *vq, int enable)
481 {
482     vq->notification = enable;
483 
484     if (!vq->vring.desc) {
485         return;
486     }
487 
488     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
489         virtio_queue_packed_set_notification(vq, enable);
490     } else {
491         virtio_queue_split_set_notification(vq, enable);
492     }
493 }
494 
495 int virtio_queue_ready(VirtQueue *vq)
496 {
497     return vq->vring.avail != 0;
498 }
499 
500 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
501                                          uint16_t *flags,
502                                          MemoryRegionCache *cache,
503                                          int i)
504 {
505     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
506 
507     *flags = virtio_lduw_phys_cached(vdev, cache, off);
508 }
509 
510 static void vring_packed_desc_read(VirtIODevice *vdev,
511                                    VRingPackedDesc *desc,
512                                    MemoryRegionCache *cache,
513                                    int i, bool strict_order)
514 {
515     hwaddr off = i * sizeof(VRingPackedDesc);
516 
517     vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
518 
519     if (strict_order) {
520         /* Make sure flags is read before the rest fields. */
521         smp_rmb();
522     }
523 
524     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
525                               &desc->addr, sizeof(desc->addr));
526     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
527                               &desc->id, sizeof(desc->id));
528     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
529                               &desc->len, sizeof(desc->len));
530     virtio_tswap64s(vdev, &desc->addr);
531     virtio_tswap16s(vdev, &desc->id);
532     virtio_tswap32s(vdev, &desc->len);
533 }
534 
535 static void vring_packed_desc_write_data(VirtIODevice *vdev,
536                                          VRingPackedDesc *desc,
537                                          MemoryRegionCache *cache,
538                                          int i)
539 {
540     hwaddr off_id = i * sizeof(VRingPackedDesc) +
541                     offsetof(VRingPackedDesc, id);
542     hwaddr off_len = i * sizeof(VRingPackedDesc) +
543                     offsetof(VRingPackedDesc, len);
544 
545     virtio_tswap32s(vdev, &desc->len);
546     virtio_tswap16s(vdev, &desc->id);
547     address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
548     address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
549     address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
550     address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
551 }
552 
553 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
554                                           VRingPackedDesc *desc,
555                                           MemoryRegionCache *cache,
556                                           int i)
557 {
558     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
559 
560     virtio_stw_phys_cached(vdev, cache, off, desc->flags);
561     address_space_cache_invalidate(cache, off, sizeof(desc->flags));
562 }
563 
564 static void vring_packed_desc_write(VirtIODevice *vdev,
565                                     VRingPackedDesc *desc,
566                                     MemoryRegionCache *cache,
567                                     int i, bool strict_order)
568 {
569     vring_packed_desc_write_data(vdev, desc, cache, i);
570     if (strict_order) {
571         /* Make sure data is wrote before flags. */
572         smp_wmb();
573     }
574     vring_packed_desc_write_flags(vdev, desc, cache, i);
575 }
576 
577 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
578 {
579     bool avail, used;
580 
581     avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
582     used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
583     return (avail != used) && (avail == wrap_counter);
584 }
585 
586 /* Fetch avail_idx from VQ memory only when we really need to know if
587  * guest has added some buffers.
588  * Called within rcu_read_lock().  */
589 static int virtio_queue_empty_rcu(VirtQueue *vq)
590 {
591     if (virtio_device_disabled(vq->vdev)) {
592         return 1;
593     }
594 
595     if (unlikely(!vq->vring.avail)) {
596         return 1;
597     }
598 
599     if (vq->shadow_avail_idx != vq->last_avail_idx) {
600         return 0;
601     }
602 
603     return vring_avail_idx(vq) == vq->last_avail_idx;
604 }
605 
606 static int virtio_queue_split_empty(VirtQueue *vq)
607 {
608     bool empty;
609 
610     if (virtio_device_disabled(vq->vdev)) {
611         return 1;
612     }
613 
614     if (unlikely(!vq->vring.avail)) {
615         return 1;
616     }
617 
618     if (vq->shadow_avail_idx != vq->last_avail_idx) {
619         return 0;
620     }
621 
622     RCU_READ_LOCK_GUARD();
623     empty = vring_avail_idx(vq) == vq->last_avail_idx;
624     return empty;
625 }
626 
627 /* Called within rcu_read_lock().  */
628 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
629 {
630     struct VRingPackedDesc desc;
631     VRingMemoryRegionCaches *cache;
632 
633     if (unlikely(!vq->vring.desc)) {
634         return 1;
635     }
636 
637     cache = vring_get_region_caches(vq);
638     if (!cache) {
639         return 1;
640     }
641 
642     vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
643                                  vq->last_avail_idx);
644 
645     return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
646 }
647 
648 static int virtio_queue_packed_empty(VirtQueue *vq)
649 {
650     RCU_READ_LOCK_GUARD();
651     return virtio_queue_packed_empty_rcu(vq);
652 }
653 
654 int virtio_queue_empty(VirtQueue *vq)
655 {
656     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
657         return virtio_queue_packed_empty(vq);
658     } else {
659         return virtio_queue_split_empty(vq);
660     }
661 }
662 
663 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
664                                unsigned int len)
665 {
666     AddressSpace *dma_as = vq->vdev->dma_as;
667     unsigned int offset;
668     int i;
669 
670     offset = 0;
671     for (i = 0; i < elem->in_num; i++) {
672         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
673 
674         dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
675                          elem->in_sg[i].iov_len,
676                          DMA_DIRECTION_FROM_DEVICE, size);
677 
678         offset += size;
679     }
680 
681     for (i = 0; i < elem->out_num; i++)
682         dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
683                          elem->out_sg[i].iov_len,
684                          DMA_DIRECTION_TO_DEVICE,
685                          elem->out_sg[i].iov_len);
686 }
687 
688 /* virtqueue_detach_element:
689  * @vq: The #VirtQueue
690  * @elem: The #VirtQueueElement
691  * @len: number of bytes written
692  *
693  * Detach the element from the virtqueue.  This function is suitable for device
694  * reset or other situations where a #VirtQueueElement is simply freed and will
695  * not be pushed or discarded.
696  */
697 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
698                               unsigned int len)
699 {
700     vq->inuse -= elem->ndescs;
701     virtqueue_unmap_sg(vq, elem, len);
702 }
703 
704 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
705 {
706     vq->last_avail_idx -= num;
707 }
708 
709 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
710 {
711     if (vq->last_avail_idx < num) {
712         vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
713         vq->last_avail_wrap_counter ^= 1;
714     } else {
715         vq->last_avail_idx -= num;
716     }
717 }
718 
719 /* virtqueue_unpop:
720  * @vq: The #VirtQueue
721  * @elem: The #VirtQueueElement
722  * @len: number of bytes written
723  *
724  * Pretend the most recent element wasn't popped from the virtqueue.  The next
725  * call to virtqueue_pop() will refetch the element.
726  */
727 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
728                      unsigned int len)
729 {
730 
731     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
732         virtqueue_packed_rewind(vq, 1);
733     } else {
734         virtqueue_split_rewind(vq, 1);
735     }
736 
737     virtqueue_detach_element(vq, elem, len);
738 }
739 
740 /* virtqueue_rewind:
741  * @vq: The #VirtQueue
742  * @num: Number of elements to push back
743  *
744  * Pretend that elements weren't popped from the virtqueue.  The next
745  * virtqueue_pop() will refetch the oldest element.
746  *
747  * Use virtqueue_unpop() instead if you have a VirtQueueElement.
748  *
749  * Returns: true on success, false if @num is greater than the number of in use
750  * elements.
751  */
752 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
753 {
754     if (num > vq->inuse) {
755         return false;
756     }
757 
758     vq->inuse -= num;
759     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
760         virtqueue_packed_rewind(vq, num);
761     } else {
762         virtqueue_split_rewind(vq, num);
763     }
764     return true;
765 }
766 
767 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
768                     unsigned int len, unsigned int idx)
769 {
770     VRingUsedElem uelem;
771 
772     if (unlikely(!vq->vring.used)) {
773         return;
774     }
775 
776     idx = (idx + vq->used_idx) % vq->vring.num;
777 
778     uelem.id = elem->index;
779     uelem.len = len;
780     vring_used_write(vq, &uelem, idx);
781 }
782 
783 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
784                                   unsigned int len, unsigned int idx)
785 {
786     vq->used_elems[idx].index = elem->index;
787     vq->used_elems[idx].len = len;
788     vq->used_elems[idx].ndescs = elem->ndescs;
789 }
790 
791 static void virtqueue_packed_fill_desc(VirtQueue *vq,
792                                        const VirtQueueElement *elem,
793                                        unsigned int idx,
794                                        bool strict_order)
795 {
796     uint16_t head;
797     VRingMemoryRegionCaches *caches;
798     VRingPackedDesc desc = {
799         .id = elem->index,
800         .len = elem->len,
801     };
802     bool wrap_counter = vq->used_wrap_counter;
803 
804     if (unlikely(!vq->vring.desc)) {
805         return;
806     }
807 
808     head = vq->used_idx + idx;
809     if (head >= vq->vring.num) {
810         head -= vq->vring.num;
811         wrap_counter ^= 1;
812     }
813     if (wrap_counter) {
814         desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
815         desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
816     } else {
817         desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
818         desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
819     }
820 
821     caches = vring_get_region_caches(vq);
822     if (!caches) {
823         return;
824     }
825 
826     vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
827 }
828 
829 /* Called within rcu_read_lock().  */
830 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
831                     unsigned int len, unsigned int idx)
832 {
833     trace_virtqueue_fill(vq, elem, len, idx);
834 
835     virtqueue_unmap_sg(vq, elem, len);
836 
837     if (virtio_device_disabled(vq->vdev)) {
838         return;
839     }
840 
841     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
842         virtqueue_packed_fill(vq, elem, len, idx);
843     } else {
844         virtqueue_split_fill(vq, elem, len, idx);
845     }
846 }
847 
848 /* Called within rcu_read_lock().  */
849 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
850 {
851     uint16_t old, new;
852 
853     if (unlikely(!vq->vring.used)) {
854         return;
855     }
856 
857     /* Make sure buffer is written before we update index. */
858     smp_wmb();
859     trace_virtqueue_flush(vq, count);
860     old = vq->used_idx;
861     new = old + count;
862     vring_used_idx_set(vq, new);
863     vq->inuse -= count;
864     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
865         vq->signalled_used_valid = false;
866 }
867 
868 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
869 {
870     unsigned int i, ndescs = 0;
871 
872     if (unlikely(!vq->vring.desc)) {
873         return;
874     }
875 
876     for (i = 1; i < count; i++) {
877         virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
878         ndescs += vq->used_elems[i].ndescs;
879     }
880     virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
881     ndescs += vq->used_elems[0].ndescs;
882 
883     vq->inuse -= ndescs;
884     vq->used_idx += ndescs;
885     if (vq->used_idx >= vq->vring.num) {
886         vq->used_idx -= vq->vring.num;
887         vq->used_wrap_counter ^= 1;
888     }
889 }
890 
891 void virtqueue_flush(VirtQueue *vq, unsigned int count)
892 {
893     if (virtio_device_disabled(vq->vdev)) {
894         vq->inuse -= count;
895         return;
896     }
897 
898     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
899         virtqueue_packed_flush(vq, count);
900     } else {
901         virtqueue_split_flush(vq, count);
902     }
903 }
904 
905 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
906                     unsigned int len)
907 {
908     RCU_READ_LOCK_GUARD();
909     virtqueue_fill(vq, elem, len, 0);
910     virtqueue_flush(vq, 1);
911 }
912 
913 /* Called within rcu_read_lock().  */
914 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
915 {
916     uint16_t num_heads = vring_avail_idx(vq) - idx;
917 
918     /* Check it isn't doing very strange things with descriptor numbers. */
919     if (num_heads > vq->vring.num) {
920         virtio_error(vq->vdev, "Guest moved used index from %u to %u",
921                      idx, vq->shadow_avail_idx);
922         return -EINVAL;
923     }
924     /* On success, callers read a descriptor at vq->last_avail_idx.
925      * Make sure descriptor read does not bypass avail index read. */
926     if (num_heads) {
927         smp_rmb();
928     }
929 
930     return num_heads;
931 }
932 
933 /* Called within rcu_read_lock().  */
934 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
935                                unsigned int *head)
936 {
937     /* Grab the next descriptor number they're advertising, and increment
938      * the index we've seen. */
939     *head = vring_avail_ring(vq, idx % vq->vring.num);
940 
941     /* If their number is silly, that's a fatal mistake. */
942     if (*head >= vq->vring.num) {
943         virtio_error(vq->vdev, "Guest says index %u is available", *head);
944         return false;
945     }
946 
947     return true;
948 }
949 
950 enum {
951     VIRTQUEUE_READ_DESC_ERROR = -1,
952     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
953     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
954 };
955 
956 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
957                                           MemoryRegionCache *desc_cache,
958                                           unsigned int max, unsigned int *next)
959 {
960     /* If this descriptor says it doesn't chain, we're done. */
961     if (!(desc->flags & VRING_DESC_F_NEXT)) {
962         return VIRTQUEUE_READ_DESC_DONE;
963     }
964 
965     /* Check they're not leading us off end of descriptors. */
966     *next = desc->next;
967     /* Make sure compiler knows to grab that: we don't want it changing! */
968     smp_wmb();
969 
970     if (*next >= max) {
971         virtio_error(vdev, "Desc next is %u", *next);
972         return VIRTQUEUE_READ_DESC_ERROR;
973     }
974 
975     vring_split_desc_read(vdev, desc, desc_cache, *next);
976     return VIRTQUEUE_READ_DESC_MORE;
977 }
978 
979 /* Called within rcu_read_lock().  */
980 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
981                             unsigned int *in_bytes, unsigned int *out_bytes,
982                             unsigned max_in_bytes, unsigned max_out_bytes,
983                             VRingMemoryRegionCaches *caches)
984 {
985     VirtIODevice *vdev = vq->vdev;
986     unsigned int max, idx;
987     unsigned int total_bufs, in_total, out_total;
988     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
989     int64_t len = 0;
990     int rc;
991 
992     idx = vq->last_avail_idx;
993     total_bufs = in_total = out_total = 0;
994 
995     max = vq->vring.num;
996 
997     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
998         MemoryRegionCache *desc_cache = &caches->desc;
999         unsigned int num_bufs;
1000         VRingDesc desc;
1001         unsigned int i;
1002 
1003         num_bufs = total_bufs;
1004 
1005         if (!virtqueue_get_head(vq, idx++, &i)) {
1006             goto err;
1007         }
1008 
1009         vring_split_desc_read(vdev, &desc, desc_cache, i);
1010 
1011         if (desc.flags & VRING_DESC_F_INDIRECT) {
1012             if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1013                 virtio_error(vdev, "Invalid size for indirect buffer table");
1014                 goto err;
1015             }
1016 
1017             /* If we've got too many, that implies a descriptor loop. */
1018             if (num_bufs >= max) {
1019                 virtio_error(vdev, "Looped descriptor");
1020                 goto err;
1021             }
1022 
1023             /* loop over the indirect descriptor table */
1024             len = address_space_cache_init(&indirect_desc_cache,
1025                                            vdev->dma_as,
1026                                            desc.addr, desc.len, false);
1027             desc_cache = &indirect_desc_cache;
1028             if (len < desc.len) {
1029                 virtio_error(vdev, "Cannot map indirect buffer");
1030                 goto err;
1031             }
1032 
1033             max = desc.len / sizeof(VRingDesc);
1034             num_bufs = i = 0;
1035             vring_split_desc_read(vdev, &desc, desc_cache, i);
1036         }
1037 
1038         do {
1039             /* If we've got too many, that implies a descriptor loop. */
1040             if (++num_bufs > max) {
1041                 virtio_error(vdev, "Looped descriptor");
1042                 goto err;
1043             }
1044 
1045             if (desc.flags & VRING_DESC_F_WRITE) {
1046                 in_total += desc.len;
1047             } else {
1048                 out_total += desc.len;
1049             }
1050             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1051                 goto done;
1052             }
1053 
1054             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1055         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1056 
1057         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1058             goto err;
1059         }
1060 
1061         if (desc_cache == &indirect_desc_cache) {
1062             address_space_cache_destroy(&indirect_desc_cache);
1063             total_bufs++;
1064         } else {
1065             total_bufs = num_bufs;
1066         }
1067     }
1068 
1069     if (rc < 0) {
1070         goto err;
1071     }
1072 
1073 done:
1074     address_space_cache_destroy(&indirect_desc_cache);
1075     if (in_bytes) {
1076         *in_bytes = in_total;
1077     }
1078     if (out_bytes) {
1079         *out_bytes = out_total;
1080     }
1081     return;
1082 
1083 err:
1084     in_total = out_total = 0;
1085     goto done;
1086 }
1087 
1088 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1089                                            VRingPackedDesc *desc,
1090                                            MemoryRegionCache
1091                                            *desc_cache,
1092                                            unsigned int max,
1093                                            unsigned int *next,
1094                                            bool indirect)
1095 {
1096     /* If this descriptor says it doesn't chain, we're done. */
1097     if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1098         return VIRTQUEUE_READ_DESC_DONE;
1099     }
1100 
1101     ++*next;
1102     if (*next == max) {
1103         if (indirect) {
1104             return VIRTQUEUE_READ_DESC_DONE;
1105         } else {
1106             (*next) -= vq->vring.num;
1107         }
1108     }
1109 
1110     vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1111     return VIRTQUEUE_READ_DESC_MORE;
1112 }
1113 
1114 /* Called within rcu_read_lock().  */
1115 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1116                                              unsigned int *in_bytes,
1117                                              unsigned int *out_bytes,
1118                                              unsigned max_in_bytes,
1119                                              unsigned max_out_bytes,
1120                                              VRingMemoryRegionCaches *caches)
1121 {
1122     VirtIODevice *vdev = vq->vdev;
1123     unsigned int max, idx;
1124     unsigned int total_bufs, in_total, out_total;
1125     MemoryRegionCache *desc_cache;
1126     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1127     int64_t len = 0;
1128     VRingPackedDesc desc;
1129     bool wrap_counter;
1130 
1131     idx = vq->last_avail_idx;
1132     wrap_counter = vq->last_avail_wrap_counter;
1133     total_bufs = in_total = out_total = 0;
1134 
1135     max = vq->vring.num;
1136 
1137     for (;;) {
1138         unsigned int num_bufs = total_bufs;
1139         unsigned int i = idx;
1140         int rc;
1141 
1142         desc_cache = &caches->desc;
1143         vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1144         if (!is_desc_avail(desc.flags, wrap_counter)) {
1145             break;
1146         }
1147 
1148         if (desc.flags & VRING_DESC_F_INDIRECT) {
1149             if (desc.len % sizeof(VRingPackedDesc)) {
1150                 virtio_error(vdev, "Invalid size for indirect buffer table");
1151                 goto err;
1152             }
1153 
1154             /* If we've got too many, that implies a descriptor loop. */
1155             if (num_bufs >= max) {
1156                 virtio_error(vdev, "Looped descriptor");
1157                 goto err;
1158             }
1159 
1160             /* loop over the indirect descriptor table */
1161             len = address_space_cache_init(&indirect_desc_cache,
1162                                            vdev->dma_as,
1163                                            desc.addr, desc.len, false);
1164             desc_cache = &indirect_desc_cache;
1165             if (len < desc.len) {
1166                 virtio_error(vdev, "Cannot map indirect buffer");
1167                 goto err;
1168             }
1169 
1170             max = desc.len / sizeof(VRingPackedDesc);
1171             num_bufs = i = 0;
1172             vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1173         }
1174 
1175         do {
1176             /* If we've got too many, that implies a descriptor loop. */
1177             if (++num_bufs > max) {
1178                 virtio_error(vdev, "Looped descriptor");
1179                 goto err;
1180             }
1181 
1182             if (desc.flags & VRING_DESC_F_WRITE) {
1183                 in_total += desc.len;
1184             } else {
1185                 out_total += desc.len;
1186             }
1187             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1188                 goto done;
1189             }
1190 
1191             rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1192                                                  &i, desc_cache ==
1193                                                  &indirect_desc_cache);
1194         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1195 
1196         if (desc_cache == &indirect_desc_cache) {
1197             address_space_cache_destroy(&indirect_desc_cache);
1198             total_bufs++;
1199             idx++;
1200         } else {
1201             idx += num_bufs - total_bufs;
1202             total_bufs = num_bufs;
1203         }
1204 
1205         if (idx >= vq->vring.num) {
1206             idx -= vq->vring.num;
1207             wrap_counter ^= 1;
1208         }
1209     }
1210 
1211     /* Record the index and wrap counter for a kick we want */
1212     vq->shadow_avail_idx = idx;
1213     vq->shadow_avail_wrap_counter = wrap_counter;
1214 done:
1215     address_space_cache_destroy(&indirect_desc_cache);
1216     if (in_bytes) {
1217         *in_bytes = in_total;
1218     }
1219     if (out_bytes) {
1220         *out_bytes = out_total;
1221     }
1222     return;
1223 
1224 err:
1225     in_total = out_total = 0;
1226     goto done;
1227 }
1228 
1229 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1230                                unsigned int *out_bytes,
1231                                unsigned max_in_bytes, unsigned max_out_bytes)
1232 {
1233     uint16_t desc_size;
1234     VRingMemoryRegionCaches *caches;
1235 
1236     RCU_READ_LOCK_GUARD();
1237 
1238     if (unlikely(!vq->vring.desc)) {
1239         goto err;
1240     }
1241 
1242     caches = vring_get_region_caches(vq);
1243     if (!caches) {
1244         goto err;
1245     }
1246 
1247     desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1248                                 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1249     if (caches->desc.len < vq->vring.num * desc_size) {
1250         virtio_error(vq->vdev, "Cannot map descriptor ring");
1251         goto err;
1252     }
1253 
1254     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1255         virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1256                                          max_in_bytes, max_out_bytes,
1257                                          caches);
1258     } else {
1259         virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1260                                         max_in_bytes, max_out_bytes,
1261                                         caches);
1262     }
1263 
1264     return;
1265 err:
1266     if (in_bytes) {
1267         *in_bytes = 0;
1268     }
1269     if (out_bytes) {
1270         *out_bytes = 0;
1271     }
1272 }
1273 
1274 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1275                           unsigned int out_bytes)
1276 {
1277     unsigned int in_total, out_total;
1278 
1279     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1280     return in_bytes <= in_total && out_bytes <= out_total;
1281 }
1282 
1283 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1284                                hwaddr *addr, struct iovec *iov,
1285                                unsigned int max_num_sg, bool is_write,
1286                                hwaddr pa, size_t sz)
1287 {
1288     bool ok = false;
1289     unsigned num_sg = *p_num_sg;
1290     assert(num_sg <= max_num_sg);
1291 
1292     if (!sz) {
1293         virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1294         goto out;
1295     }
1296 
1297     while (sz) {
1298         hwaddr len = sz;
1299 
1300         if (num_sg == max_num_sg) {
1301             virtio_error(vdev, "virtio: too many write descriptors in "
1302                                "indirect table");
1303             goto out;
1304         }
1305 
1306         iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1307                                               is_write ?
1308                                               DMA_DIRECTION_FROM_DEVICE :
1309                                               DMA_DIRECTION_TO_DEVICE,
1310                                               MEMTXATTRS_UNSPECIFIED);
1311         if (!iov[num_sg].iov_base) {
1312             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1313             goto out;
1314         }
1315 
1316         iov[num_sg].iov_len = len;
1317         addr[num_sg] = pa;
1318 
1319         sz -= len;
1320         pa += len;
1321         num_sg++;
1322     }
1323     ok = true;
1324 
1325 out:
1326     *p_num_sg = num_sg;
1327     return ok;
1328 }
1329 
1330 /* Only used by error code paths before we have a VirtQueueElement (therefore
1331  * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1332  * yet.
1333  */
1334 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1335                                     struct iovec *iov)
1336 {
1337     unsigned int i;
1338 
1339     for (i = 0; i < out_num + in_num; i++) {
1340         int is_write = i >= out_num;
1341 
1342         cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1343         iov++;
1344     }
1345 }
1346 
1347 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1348                                 hwaddr *addr, unsigned int num_sg,
1349                                 bool is_write)
1350 {
1351     unsigned int i;
1352     hwaddr len;
1353 
1354     for (i = 0; i < num_sg; i++) {
1355         len = sg[i].iov_len;
1356         sg[i].iov_base = dma_memory_map(vdev->dma_as,
1357                                         addr[i], &len, is_write ?
1358                                         DMA_DIRECTION_FROM_DEVICE :
1359                                         DMA_DIRECTION_TO_DEVICE,
1360                                         MEMTXATTRS_UNSPECIFIED);
1361         if (!sg[i].iov_base) {
1362             error_report("virtio: error trying to map MMIO memory");
1363             exit(1);
1364         }
1365         if (len != sg[i].iov_len) {
1366             error_report("virtio: unexpected memory split");
1367             exit(1);
1368         }
1369     }
1370 }
1371 
1372 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1373 {
1374     virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1375     virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1376                                                                         false);
1377 }
1378 
1379 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1380 {
1381     VirtQueueElement *elem;
1382     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1383     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1384     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1385     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1386     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1387     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1388 
1389     assert(sz >= sizeof(VirtQueueElement));
1390     elem = g_malloc(out_sg_end);
1391     trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1392     elem->out_num = out_num;
1393     elem->in_num = in_num;
1394     elem->in_addr = (void *)elem + in_addr_ofs;
1395     elem->out_addr = (void *)elem + out_addr_ofs;
1396     elem->in_sg = (void *)elem + in_sg_ofs;
1397     elem->out_sg = (void *)elem + out_sg_ofs;
1398     return elem;
1399 }
1400 
1401 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1402 {
1403     unsigned int i, head, max;
1404     VRingMemoryRegionCaches *caches;
1405     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1406     MemoryRegionCache *desc_cache;
1407     int64_t len;
1408     VirtIODevice *vdev = vq->vdev;
1409     VirtQueueElement *elem = NULL;
1410     unsigned out_num, in_num, elem_entries;
1411     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1412     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1413     VRingDesc desc;
1414     int rc;
1415 
1416     RCU_READ_LOCK_GUARD();
1417     if (virtio_queue_empty_rcu(vq)) {
1418         goto done;
1419     }
1420     /* Needed after virtio_queue_empty(), see comment in
1421      * virtqueue_num_heads(). */
1422     smp_rmb();
1423 
1424     /* When we start there are none of either input nor output. */
1425     out_num = in_num = elem_entries = 0;
1426 
1427     max = vq->vring.num;
1428 
1429     if (vq->inuse >= vq->vring.num) {
1430         virtio_error(vdev, "Virtqueue size exceeded");
1431         goto done;
1432     }
1433 
1434     if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1435         goto done;
1436     }
1437 
1438     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1439         vring_set_avail_event(vq, vq->last_avail_idx);
1440     }
1441 
1442     i = head;
1443 
1444     caches = vring_get_region_caches(vq);
1445     if (!caches) {
1446         virtio_error(vdev, "Region caches not initialized");
1447         goto done;
1448     }
1449 
1450     if (caches->desc.len < max * sizeof(VRingDesc)) {
1451         virtio_error(vdev, "Cannot map descriptor ring");
1452         goto done;
1453     }
1454 
1455     desc_cache = &caches->desc;
1456     vring_split_desc_read(vdev, &desc, desc_cache, i);
1457     if (desc.flags & VRING_DESC_F_INDIRECT) {
1458         if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1459             virtio_error(vdev, "Invalid size for indirect buffer table");
1460             goto done;
1461         }
1462 
1463         /* loop over the indirect descriptor table */
1464         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1465                                        desc.addr, desc.len, false);
1466         desc_cache = &indirect_desc_cache;
1467         if (len < desc.len) {
1468             virtio_error(vdev, "Cannot map indirect buffer");
1469             goto done;
1470         }
1471 
1472         max = desc.len / sizeof(VRingDesc);
1473         i = 0;
1474         vring_split_desc_read(vdev, &desc, desc_cache, i);
1475     }
1476 
1477     /* Collect all the descriptors */
1478     do {
1479         bool map_ok;
1480 
1481         if (desc.flags & VRING_DESC_F_WRITE) {
1482             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1483                                         iov + out_num,
1484                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1485                                         desc.addr, desc.len);
1486         } else {
1487             if (in_num) {
1488                 virtio_error(vdev, "Incorrect order for descriptors");
1489                 goto err_undo_map;
1490             }
1491             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1492                                         VIRTQUEUE_MAX_SIZE, false,
1493                                         desc.addr, desc.len);
1494         }
1495         if (!map_ok) {
1496             goto err_undo_map;
1497         }
1498 
1499         /* If we've got too many, that implies a descriptor loop. */
1500         if (++elem_entries > max) {
1501             virtio_error(vdev, "Looped descriptor");
1502             goto err_undo_map;
1503         }
1504 
1505         rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1506     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1507 
1508     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1509         goto err_undo_map;
1510     }
1511 
1512     /* Now copy what we have collected and mapped */
1513     elem = virtqueue_alloc_element(sz, out_num, in_num);
1514     elem->index = head;
1515     elem->ndescs = 1;
1516     for (i = 0; i < out_num; i++) {
1517         elem->out_addr[i] = addr[i];
1518         elem->out_sg[i] = iov[i];
1519     }
1520     for (i = 0; i < in_num; i++) {
1521         elem->in_addr[i] = addr[out_num + i];
1522         elem->in_sg[i] = iov[out_num + i];
1523     }
1524 
1525     vq->inuse++;
1526 
1527     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1528 done:
1529     address_space_cache_destroy(&indirect_desc_cache);
1530 
1531     return elem;
1532 
1533 err_undo_map:
1534     virtqueue_undo_map_desc(out_num, in_num, iov);
1535     goto done;
1536 }
1537 
1538 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1539 {
1540     unsigned int i, max;
1541     VRingMemoryRegionCaches *caches;
1542     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1543     MemoryRegionCache *desc_cache;
1544     int64_t len;
1545     VirtIODevice *vdev = vq->vdev;
1546     VirtQueueElement *elem = NULL;
1547     unsigned out_num, in_num, elem_entries;
1548     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1549     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1550     VRingPackedDesc desc;
1551     uint16_t id;
1552     int rc;
1553 
1554     RCU_READ_LOCK_GUARD();
1555     if (virtio_queue_packed_empty_rcu(vq)) {
1556         goto done;
1557     }
1558 
1559     /* When we start there are none of either input nor output. */
1560     out_num = in_num = elem_entries = 0;
1561 
1562     max = vq->vring.num;
1563 
1564     if (vq->inuse >= vq->vring.num) {
1565         virtio_error(vdev, "Virtqueue size exceeded");
1566         goto done;
1567     }
1568 
1569     i = vq->last_avail_idx;
1570 
1571     caches = vring_get_region_caches(vq);
1572     if (!caches) {
1573         virtio_error(vdev, "Region caches not initialized");
1574         goto done;
1575     }
1576 
1577     if (caches->desc.len < max * sizeof(VRingDesc)) {
1578         virtio_error(vdev, "Cannot map descriptor ring");
1579         goto done;
1580     }
1581 
1582     desc_cache = &caches->desc;
1583     vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1584     id = desc.id;
1585     if (desc.flags & VRING_DESC_F_INDIRECT) {
1586         if (desc.len % sizeof(VRingPackedDesc)) {
1587             virtio_error(vdev, "Invalid size for indirect buffer table");
1588             goto done;
1589         }
1590 
1591         /* loop over the indirect descriptor table */
1592         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1593                                        desc.addr, desc.len, false);
1594         desc_cache = &indirect_desc_cache;
1595         if (len < desc.len) {
1596             virtio_error(vdev, "Cannot map indirect buffer");
1597             goto done;
1598         }
1599 
1600         max = desc.len / sizeof(VRingPackedDesc);
1601         i = 0;
1602         vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1603     }
1604 
1605     /* Collect all the descriptors */
1606     do {
1607         bool map_ok;
1608 
1609         if (desc.flags & VRING_DESC_F_WRITE) {
1610             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1611                                         iov + out_num,
1612                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1613                                         desc.addr, desc.len);
1614         } else {
1615             if (in_num) {
1616                 virtio_error(vdev, "Incorrect order for descriptors");
1617                 goto err_undo_map;
1618             }
1619             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1620                                         VIRTQUEUE_MAX_SIZE, false,
1621                                         desc.addr, desc.len);
1622         }
1623         if (!map_ok) {
1624             goto err_undo_map;
1625         }
1626 
1627         /* If we've got too many, that implies a descriptor loop. */
1628         if (++elem_entries > max) {
1629             virtio_error(vdev, "Looped descriptor");
1630             goto err_undo_map;
1631         }
1632 
1633         rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1634                                              desc_cache ==
1635                                              &indirect_desc_cache);
1636     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1637 
1638     /* Now copy what we have collected and mapped */
1639     elem = virtqueue_alloc_element(sz, out_num, in_num);
1640     for (i = 0; i < out_num; i++) {
1641         elem->out_addr[i] = addr[i];
1642         elem->out_sg[i] = iov[i];
1643     }
1644     for (i = 0; i < in_num; i++) {
1645         elem->in_addr[i] = addr[out_num + i];
1646         elem->in_sg[i] = iov[out_num + i];
1647     }
1648 
1649     elem->index = id;
1650     elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1651     vq->last_avail_idx += elem->ndescs;
1652     vq->inuse += elem->ndescs;
1653 
1654     if (vq->last_avail_idx >= vq->vring.num) {
1655         vq->last_avail_idx -= vq->vring.num;
1656         vq->last_avail_wrap_counter ^= 1;
1657     }
1658 
1659     vq->shadow_avail_idx = vq->last_avail_idx;
1660     vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1661 
1662     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1663 done:
1664     address_space_cache_destroy(&indirect_desc_cache);
1665 
1666     return elem;
1667 
1668 err_undo_map:
1669     virtqueue_undo_map_desc(out_num, in_num, iov);
1670     goto done;
1671 }
1672 
1673 void *virtqueue_pop(VirtQueue *vq, size_t sz)
1674 {
1675     if (virtio_device_disabled(vq->vdev)) {
1676         return NULL;
1677     }
1678 
1679     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1680         return virtqueue_packed_pop(vq, sz);
1681     } else {
1682         return virtqueue_split_pop(vq, sz);
1683     }
1684 }
1685 
1686 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1687 {
1688     VRingMemoryRegionCaches *caches;
1689     MemoryRegionCache *desc_cache;
1690     unsigned int dropped = 0;
1691     VirtQueueElement elem = {};
1692     VirtIODevice *vdev = vq->vdev;
1693     VRingPackedDesc desc;
1694 
1695     RCU_READ_LOCK_GUARD();
1696 
1697     caches = vring_get_region_caches(vq);
1698     if (!caches) {
1699         return 0;
1700     }
1701 
1702     desc_cache = &caches->desc;
1703 
1704     virtio_queue_set_notification(vq, 0);
1705 
1706     while (vq->inuse < vq->vring.num) {
1707         unsigned int idx = vq->last_avail_idx;
1708         /*
1709          * works similar to virtqueue_pop but does not map buffers
1710          * and does not allocate any memory.
1711          */
1712         vring_packed_desc_read(vdev, &desc, desc_cache,
1713                                vq->last_avail_idx , true);
1714         if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1715             break;
1716         }
1717         elem.index = desc.id;
1718         elem.ndescs = 1;
1719         while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1720                                                vq->vring.num, &idx, false)) {
1721             ++elem.ndescs;
1722         }
1723         /*
1724          * immediately push the element, nothing to unmap
1725          * as both in_num and out_num are set to 0.
1726          */
1727         virtqueue_push(vq, &elem, 0);
1728         dropped++;
1729         vq->last_avail_idx += elem.ndescs;
1730         if (vq->last_avail_idx >= vq->vring.num) {
1731             vq->last_avail_idx -= vq->vring.num;
1732             vq->last_avail_wrap_counter ^= 1;
1733         }
1734     }
1735 
1736     return dropped;
1737 }
1738 
1739 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1740 {
1741     unsigned int dropped = 0;
1742     VirtQueueElement elem = {};
1743     VirtIODevice *vdev = vq->vdev;
1744     bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1745 
1746     while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1747         /* works similar to virtqueue_pop but does not map buffers
1748         * and does not allocate any memory */
1749         smp_rmb();
1750         if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1751             break;
1752         }
1753         vq->inuse++;
1754         vq->last_avail_idx++;
1755         if (fEventIdx) {
1756             vring_set_avail_event(vq, vq->last_avail_idx);
1757         }
1758         /* immediately push the element, nothing to unmap
1759          * as both in_num and out_num are set to 0 */
1760         virtqueue_push(vq, &elem, 0);
1761         dropped++;
1762     }
1763 
1764     return dropped;
1765 }
1766 
1767 /* virtqueue_drop_all:
1768  * @vq: The #VirtQueue
1769  * Drops all queued buffers and indicates them to the guest
1770  * as if they are done. Useful when buffers can not be
1771  * processed but must be returned to the guest.
1772  */
1773 unsigned int virtqueue_drop_all(VirtQueue *vq)
1774 {
1775     struct VirtIODevice *vdev = vq->vdev;
1776 
1777     if (virtio_device_disabled(vq->vdev)) {
1778         return 0;
1779     }
1780 
1781     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1782         return virtqueue_packed_drop_all(vq);
1783     } else {
1784         return virtqueue_split_drop_all(vq);
1785     }
1786 }
1787 
1788 /* Reading and writing a structure directly to QEMUFile is *awful*, but
1789  * it is what QEMU has always done by mistake.  We can change it sooner
1790  * or later by bumping the version number of the affected vm states.
1791  * In the meanwhile, since the in-memory layout of VirtQueueElement
1792  * has changed, we need to marshal to and from the layout that was
1793  * used before the change.
1794  */
1795 typedef struct VirtQueueElementOld {
1796     unsigned int index;
1797     unsigned int out_num;
1798     unsigned int in_num;
1799     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1800     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1801     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1802     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1803 } VirtQueueElementOld;
1804 
1805 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1806 {
1807     VirtQueueElement *elem;
1808     VirtQueueElementOld data;
1809     int i;
1810 
1811     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1812 
1813     /* TODO: teach all callers that this can fail, and return failure instead
1814      * of asserting here.
1815      * This is just one thing (there are probably more) that must be
1816      * fixed before we can allow NDEBUG compilation.
1817      */
1818     assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1819     assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1820 
1821     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1822     elem->index = data.index;
1823 
1824     for (i = 0; i < elem->in_num; i++) {
1825         elem->in_addr[i] = data.in_addr[i];
1826     }
1827 
1828     for (i = 0; i < elem->out_num; i++) {
1829         elem->out_addr[i] = data.out_addr[i];
1830     }
1831 
1832     for (i = 0; i < elem->in_num; i++) {
1833         /* Base is overwritten by virtqueue_map.  */
1834         elem->in_sg[i].iov_base = 0;
1835         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1836     }
1837 
1838     for (i = 0; i < elem->out_num; i++) {
1839         /* Base is overwritten by virtqueue_map.  */
1840         elem->out_sg[i].iov_base = 0;
1841         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1842     }
1843 
1844     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1845         qemu_get_be32s(f, &elem->ndescs);
1846     }
1847 
1848     virtqueue_map(vdev, elem);
1849     return elem;
1850 }
1851 
1852 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1853                                 VirtQueueElement *elem)
1854 {
1855     VirtQueueElementOld data;
1856     int i;
1857 
1858     memset(&data, 0, sizeof(data));
1859     data.index = elem->index;
1860     data.in_num = elem->in_num;
1861     data.out_num = elem->out_num;
1862 
1863     for (i = 0; i < elem->in_num; i++) {
1864         data.in_addr[i] = elem->in_addr[i];
1865     }
1866 
1867     for (i = 0; i < elem->out_num; i++) {
1868         data.out_addr[i] = elem->out_addr[i];
1869     }
1870 
1871     for (i = 0; i < elem->in_num; i++) {
1872         /* Base is overwritten by virtqueue_map when loading.  Do not
1873          * save it, as it would leak the QEMU address space layout.  */
1874         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1875     }
1876 
1877     for (i = 0; i < elem->out_num; i++) {
1878         /* Do not save iov_base as above.  */
1879         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1880     }
1881 
1882     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1883         qemu_put_be32s(f, &elem->ndescs);
1884     }
1885 
1886     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1887 }
1888 
1889 /* virtio device */
1890 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1891 {
1892     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1893     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1894 
1895     if (virtio_device_disabled(vdev)) {
1896         return;
1897     }
1898 
1899     if (k->notify) {
1900         k->notify(qbus->parent, vector);
1901     }
1902 }
1903 
1904 void virtio_update_irq(VirtIODevice *vdev)
1905 {
1906     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1907 }
1908 
1909 static int virtio_validate_features(VirtIODevice *vdev)
1910 {
1911     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1912 
1913     if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1914         !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1915         return -EFAULT;
1916     }
1917 
1918     if (k->validate_features) {
1919         return k->validate_features(vdev);
1920     } else {
1921         return 0;
1922     }
1923 }
1924 
1925 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1926 {
1927     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1928     trace_virtio_set_status(vdev, val);
1929 
1930     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1931         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
1932             val & VIRTIO_CONFIG_S_FEATURES_OK) {
1933             int ret = virtio_validate_features(vdev);
1934 
1935             if (ret) {
1936                 return ret;
1937             }
1938         }
1939     }
1940 
1941     if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
1942         (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1943         virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
1944     }
1945 
1946     if (k->set_status) {
1947         k->set_status(vdev, val);
1948     }
1949     vdev->status = val;
1950 
1951     return 0;
1952 }
1953 
1954 static enum virtio_device_endian virtio_default_endian(void)
1955 {
1956     if (target_words_bigendian()) {
1957         return VIRTIO_DEVICE_ENDIAN_BIG;
1958     } else {
1959         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1960     }
1961 }
1962 
1963 static enum virtio_device_endian virtio_current_cpu_endian(void)
1964 {
1965     if (cpu_virtio_is_big_endian(current_cpu)) {
1966         return VIRTIO_DEVICE_ENDIAN_BIG;
1967     } else {
1968         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1969     }
1970 }
1971 
1972 void virtio_reset(void *opaque)
1973 {
1974     VirtIODevice *vdev = opaque;
1975     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1976     int i;
1977 
1978     virtio_set_status(vdev, 0);
1979     if (current_cpu) {
1980         /* Guest initiated reset */
1981         vdev->device_endian = virtio_current_cpu_endian();
1982     } else {
1983         /* System reset */
1984         vdev->device_endian = virtio_default_endian();
1985     }
1986 
1987     if (k->reset) {
1988         k->reset(vdev);
1989     }
1990 
1991     vdev->start_on_kick = false;
1992     vdev->started = false;
1993     vdev->broken = false;
1994     vdev->guest_features = 0;
1995     vdev->queue_sel = 0;
1996     vdev->status = 0;
1997     vdev->disabled = false;
1998     qatomic_set(&vdev->isr, 0);
1999     vdev->config_vector = VIRTIO_NO_VECTOR;
2000     virtio_notify_vector(vdev, vdev->config_vector);
2001 
2002     for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2003         vdev->vq[i].vring.desc = 0;
2004         vdev->vq[i].vring.avail = 0;
2005         vdev->vq[i].vring.used = 0;
2006         vdev->vq[i].last_avail_idx = 0;
2007         vdev->vq[i].shadow_avail_idx = 0;
2008         vdev->vq[i].used_idx = 0;
2009         vdev->vq[i].last_avail_wrap_counter = true;
2010         vdev->vq[i].shadow_avail_wrap_counter = true;
2011         vdev->vq[i].used_wrap_counter = true;
2012         virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2013         vdev->vq[i].signalled_used = 0;
2014         vdev->vq[i].signalled_used_valid = false;
2015         vdev->vq[i].notification = true;
2016         vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2017         vdev->vq[i].inuse = 0;
2018         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2019     }
2020 }
2021 
2022 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
2023 {
2024     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2025     uint8_t val;
2026 
2027     if (addr + sizeof(val) > vdev->config_len) {
2028         return (uint32_t)-1;
2029     }
2030 
2031     k->get_config(vdev, vdev->config);
2032 
2033     val = ldub_p(vdev->config + addr);
2034     return val;
2035 }
2036 
2037 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
2038 {
2039     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2040     uint16_t val;
2041 
2042     if (addr + sizeof(val) > vdev->config_len) {
2043         return (uint32_t)-1;
2044     }
2045 
2046     k->get_config(vdev, vdev->config);
2047 
2048     val = lduw_p(vdev->config + addr);
2049     return val;
2050 }
2051 
2052 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
2053 {
2054     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2055     uint32_t val;
2056 
2057     if (addr + sizeof(val) > vdev->config_len) {
2058         return (uint32_t)-1;
2059     }
2060 
2061     k->get_config(vdev, vdev->config);
2062 
2063     val = ldl_p(vdev->config + addr);
2064     return val;
2065 }
2066 
2067 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2068 {
2069     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2070     uint8_t val = data;
2071 
2072     if (addr + sizeof(val) > vdev->config_len) {
2073         return;
2074     }
2075 
2076     stb_p(vdev->config + addr, val);
2077 
2078     if (k->set_config) {
2079         k->set_config(vdev, vdev->config);
2080     }
2081 }
2082 
2083 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2084 {
2085     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2086     uint16_t val = data;
2087 
2088     if (addr + sizeof(val) > vdev->config_len) {
2089         return;
2090     }
2091 
2092     stw_p(vdev->config + addr, val);
2093 
2094     if (k->set_config) {
2095         k->set_config(vdev, vdev->config);
2096     }
2097 }
2098 
2099 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2100 {
2101     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2102     uint32_t val = data;
2103 
2104     if (addr + sizeof(val) > vdev->config_len) {
2105         return;
2106     }
2107 
2108     stl_p(vdev->config + addr, val);
2109 
2110     if (k->set_config) {
2111         k->set_config(vdev, vdev->config);
2112     }
2113 }
2114 
2115 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2116 {
2117     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2118     uint8_t val;
2119 
2120     if (addr + sizeof(val) > vdev->config_len) {
2121         return (uint32_t)-1;
2122     }
2123 
2124     k->get_config(vdev, vdev->config);
2125 
2126     val = ldub_p(vdev->config + addr);
2127     return val;
2128 }
2129 
2130 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2131 {
2132     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2133     uint16_t val;
2134 
2135     if (addr + sizeof(val) > vdev->config_len) {
2136         return (uint32_t)-1;
2137     }
2138 
2139     k->get_config(vdev, vdev->config);
2140 
2141     val = lduw_le_p(vdev->config + addr);
2142     return val;
2143 }
2144 
2145 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2146 {
2147     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2148     uint32_t val;
2149 
2150     if (addr + sizeof(val) > vdev->config_len) {
2151         return (uint32_t)-1;
2152     }
2153 
2154     k->get_config(vdev, vdev->config);
2155 
2156     val = ldl_le_p(vdev->config + addr);
2157     return val;
2158 }
2159 
2160 void virtio_config_modern_writeb(VirtIODevice *vdev,
2161                                  uint32_t addr, uint32_t data)
2162 {
2163     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2164     uint8_t val = data;
2165 
2166     if (addr + sizeof(val) > vdev->config_len) {
2167         return;
2168     }
2169 
2170     stb_p(vdev->config + addr, val);
2171 
2172     if (k->set_config) {
2173         k->set_config(vdev, vdev->config);
2174     }
2175 }
2176 
2177 void virtio_config_modern_writew(VirtIODevice *vdev,
2178                                  uint32_t addr, uint32_t data)
2179 {
2180     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2181     uint16_t val = data;
2182 
2183     if (addr + sizeof(val) > vdev->config_len) {
2184         return;
2185     }
2186 
2187     stw_le_p(vdev->config + addr, val);
2188 
2189     if (k->set_config) {
2190         k->set_config(vdev, vdev->config);
2191     }
2192 }
2193 
2194 void virtio_config_modern_writel(VirtIODevice *vdev,
2195                                  uint32_t addr, uint32_t data)
2196 {
2197     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2198     uint32_t val = data;
2199 
2200     if (addr + sizeof(val) > vdev->config_len) {
2201         return;
2202     }
2203 
2204     stl_le_p(vdev->config + addr, val);
2205 
2206     if (k->set_config) {
2207         k->set_config(vdev, vdev->config);
2208     }
2209 }
2210 
2211 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2212 {
2213     if (!vdev->vq[n].vring.num) {
2214         return;
2215     }
2216     vdev->vq[n].vring.desc = addr;
2217     virtio_queue_update_rings(vdev, n);
2218 }
2219 
2220 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2221 {
2222     return vdev->vq[n].vring.desc;
2223 }
2224 
2225 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2226                             hwaddr avail, hwaddr used)
2227 {
2228     if (!vdev->vq[n].vring.num) {
2229         return;
2230     }
2231     vdev->vq[n].vring.desc = desc;
2232     vdev->vq[n].vring.avail = avail;
2233     vdev->vq[n].vring.used = used;
2234     virtio_init_region_cache(vdev, n);
2235 }
2236 
2237 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2238 {
2239     /* Don't allow guest to flip queue between existent and
2240      * nonexistent states, or to set it to an invalid size.
2241      */
2242     if (!!num != !!vdev->vq[n].vring.num ||
2243         num > VIRTQUEUE_MAX_SIZE ||
2244         num < 0) {
2245         return;
2246     }
2247     vdev->vq[n].vring.num = num;
2248 }
2249 
2250 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2251 {
2252     return QLIST_FIRST(&vdev->vector_queues[vector]);
2253 }
2254 
2255 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2256 {
2257     return QLIST_NEXT(vq, node);
2258 }
2259 
2260 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2261 {
2262     return vdev->vq[n].vring.num;
2263 }
2264 
2265 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2266 {
2267     return vdev->vq[n].vring.num_default;
2268 }
2269 
2270 int virtio_get_num_queues(VirtIODevice *vdev)
2271 {
2272     int i;
2273 
2274     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2275         if (!virtio_queue_get_num(vdev, i)) {
2276             break;
2277         }
2278     }
2279 
2280     return i;
2281 }
2282 
2283 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2284 {
2285     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2286     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2287 
2288     /* virtio-1 compliant devices cannot change the alignment */
2289     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2290         error_report("tried to modify queue alignment for virtio-1 device");
2291         return;
2292     }
2293     /* Check that the transport told us it was going to do this
2294      * (so a buggy transport will immediately assert rather than
2295      * silently failing to migrate this state)
2296      */
2297     assert(k->has_variable_vring_alignment);
2298 
2299     if (align) {
2300         vdev->vq[n].vring.align = align;
2301         virtio_queue_update_rings(vdev, n);
2302     }
2303 }
2304 
2305 static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
2306 {
2307     bool ret = false;
2308 
2309     if (vq->vring.desc && vq->handle_aio_output) {
2310         VirtIODevice *vdev = vq->vdev;
2311 
2312         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2313         ret = vq->handle_aio_output(vdev, vq);
2314 
2315         if (unlikely(vdev->start_on_kick)) {
2316             virtio_set_started(vdev, true);
2317         }
2318     }
2319 
2320     return ret;
2321 }
2322 
2323 static void virtio_queue_notify_vq(VirtQueue *vq)
2324 {
2325     if (vq->vring.desc && vq->handle_output) {
2326         VirtIODevice *vdev = vq->vdev;
2327 
2328         if (unlikely(vdev->broken)) {
2329             return;
2330         }
2331 
2332         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2333         vq->handle_output(vdev, vq);
2334 
2335         if (unlikely(vdev->start_on_kick)) {
2336             virtio_set_started(vdev, true);
2337         }
2338     }
2339 }
2340 
2341 void virtio_queue_notify(VirtIODevice *vdev, int n)
2342 {
2343     VirtQueue *vq = &vdev->vq[n];
2344 
2345     if (unlikely(!vq->vring.desc || vdev->broken)) {
2346         return;
2347     }
2348 
2349     trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2350     if (vq->host_notifier_enabled) {
2351         event_notifier_set(&vq->host_notifier);
2352     } else if (vq->handle_output) {
2353         vq->handle_output(vdev, vq);
2354 
2355         if (unlikely(vdev->start_on_kick)) {
2356             virtio_set_started(vdev, true);
2357         }
2358     }
2359 }
2360 
2361 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2362 {
2363     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2364         VIRTIO_NO_VECTOR;
2365 }
2366 
2367 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2368 {
2369     VirtQueue *vq = &vdev->vq[n];
2370 
2371     if (n < VIRTIO_QUEUE_MAX) {
2372         if (vdev->vector_queues &&
2373             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2374             QLIST_REMOVE(vq, node);
2375         }
2376         vdev->vq[n].vector = vector;
2377         if (vdev->vector_queues &&
2378             vector != VIRTIO_NO_VECTOR) {
2379             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2380         }
2381     }
2382 }
2383 
2384 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2385                             VirtIOHandleOutput handle_output)
2386 {
2387     int i;
2388 
2389     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2390         if (vdev->vq[i].vring.num == 0)
2391             break;
2392     }
2393 
2394     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2395         abort();
2396 
2397     vdev->vq[i].vring.num = queue_size;
2398     vdev->vq[i].vring.num_default = queue_size;
2399     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2400     vdev->vq[i].handle_output = handle_output;
2401     vdev->vq[i].handle_aio_output = NULL;
2402     vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) *
2403                                        queue_size);
2404 
2405     return &vdev->vq[i];
2406 }
2407 
2408 void virtio_delete_queue(VirtQueue *vq)
2409 {
2410     vq->vring.num = 0;
2411     vq->vring.num_default = 0;
2412     vq->handle_output = NULL;
2413     vq->handle_aio_output = NULL;
2414     g_free(vq->used_elems);
2415     vq->used_elems = NULL;
2416     virtio_virtqueue_reset_region_cache(vq);
2417 }
2418 
2419 void virtio_del_queue(VirtIODevice *vdev, int n)
2420 {
2421     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2422         abort();
2423     }
2424 
2425     virtio_delete_queue(&vdev->vq[n]);
2426 }
2427 
2428 static void virtio_set_isr(VirtIODevice *vdev, int value)
2429 {
2430     uint8_t old = qatomic_read(&vdev->isr);
2431 
2432     /* Do not write ISR if it does not change, so that its cacheline remains
2433      * shared in the common case where the guest does not read it.
2434      */
2435     if ((old & value) != value) {
2436         qatomic_or(&vdev->isr, value);
2437     }
2438 }
2439 
2440 /* Called within rcu_read_lock(). */
2441 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2442 {
2443     uint16_t old, new;
2444     bool v;
2445     /* We need to expose used array entries before checking used event. */
2446     smp_mb();
2447     /* Always notify when queue is empty (when feature acknowledge) */
2448     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2449         !vq->inuse && virtio_queue_empty(vq)) {
2450         return true;
2451     }
2452 
2453     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2454         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2455     }
2456 
2457     v = vq->signalled_used_valid;
2458     vq->signalled_used_valid = true;
2459     old = vq->signalled_used;
2460     new = vq->signalled_used = vq->used_idx;
2461     return !v || vring_need_event(vring_get_used_event(vq), new, old);
2462 }
2463 
2464 static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2465                                     uint16_t off_wrap, uint16_t new,
2466                                     uint16_t old)
2467 {
2468     int off = off_wrap & ~(1 << 15);
2469 
2470     if (wrap != off_wrap >> 15) {
2471         off -= vq->vring.num;
2472     }
2473 
2474     return vring_need_event(off, new, old);
2475 }
2476 
2477 /* Called within rcu_read_lock(). */
2478 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2479 {
2480     VRingPackedDescEvent e;
2481     uint16_t old, new;
2482     bool v;
2483     VRingMemoryRegionCaches *caches;
2484 
2485     caches = vring_get_region_caches(vq);
2486     if (!caches) {
2487         return false;
2488     }
2489 
2490     vring_packed_event_read(vdev, &caches->avail, &e);
2491 
2492     old = vq->signalled_used;
2493     new = vq->signalled_used = vq->used_idx;
2494     v = vq->signalled_used_valid;
2495     vq->signalled_used_valid = true;
2496 
2497     if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2498         return false;
2499     } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2500         return true;
2501     }
2502 
2503     return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2504                                          e.off_wrap, new, old);
2505 }
2506 
2507 /* Called within rcu_read_lock().  */
2508 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2509 {
2510     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2511         return virtio_packed_should_notify(vdev, vq);
2512     } else {
2513         return virtio_split_should_notify(vdev, vq);
2514     }
2515 }
2516 
2517 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2518 {
2519     WITH_RCU_READ_LOCK_GUARD() {
2520         if (!virtio_should_notify(vdev, vq)) {
2521             return;
2522         }
2523     }
2524 
2525     trace_virtio_notify_irqfd(vdev, vq);
2526 
2527     /*
2528      * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2529      * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2530      * incorrectly polling this bit during crashdump and hibernation
2531      * in MSI mode, causing a hang if this bit is never updated.
2532      * Recent releases of Windows do not really shut down, but rather
2533      * log out and hibernate to make the next startup faster.  Hence,
2534      * this manifested as a more serious hang during shutdown with
2535      *
2536      * Next driver release from 2016 fixed this problem, so working around it
2537      * is not a must, but it's easy to do so let's do it here.
2538      *
2539      * Note: it's safe to update ISR from any thread as it was switched
2540      * to an atomic operation.
2541      */
2542     virtio_set_isr(vq->vdev, 0x1);
2543     event_notifier_set(&vq->guest_notifier);
2544 }
2545 
2546 static void virtio_irq(VirtQueue *vq)
2547 {
2548     virtio_set_isr(vq->vdev, 0x1);
2549     virtio_notify_vector(vq->vdev, vq->vector);
2550 }
2551 
2552 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2553 {
2554     WITH_RCU_READ_LOCK_GUARD() {
2555         if (!virtio_should_notify(vdev, vq)) {
2556             return;
2557         }
2558     }
2559 
2560     trace_virtio_notify(vdev, vq);
2561     virtio_irq(vq);
2562 }
2563 
2564 void virtio_notify_config(VirtIODevice *vdev)
2565 {
2566     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2567         return;
2568 
2569     virtio_set_isr(vdev, 0x3);
2570     vdev->generation++;
2571     virtio_notify_vector(vdev, vdev->config_vector);
2572 }
2573 
2574 static bool virtio_device_endian_needed(void *opaque)
2575 {
2576     VirtIODevice *vdev = opaque;
2577 
2578     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2579     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2580         return vdev->device_endian != virtio_default_endian();
2581     }
2582     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2583     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2584 }
2585 
2586 static bool virtio_64bit_features_needed(void *opaque)
2587 {
2588     VirtIODevice *vdev = opaque;
2589 
2590     return (vdev->host_features >> 32) != 0;
2591 }
2592 
2593 static bool virtio_virtqueue_needed(void *opaque)
2594 {
2595     VirtIODevice *vdev = opaque;
2596 
2597     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2598 }
2599 
2600 static bool virtio_packed_virtqueue_needed(void *opaque)
2601 {
2602     VirtIODevice *vdev = opaque;
2603 
2604     return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2605 }
2606 
2607 static bool virtio_ringsize_needed(void *opaque)
2608 {
2609     VirtIODevice *vdev = opaque;
2610     int i;
2611 
2612     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2613         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2614             return true;
2615         }
2616     }
2617     return false;
2618 }
2619 
2620 static bool virtio_extra_state_needed(void *opaque)
2621 {
2622     VirtIODevice *vdev = opaque;
2623     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2624     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2625 
2626     return k->has_extra_state &&
2627         k->has_extra_state(qbus->parent);
2628 }
2629 
2630 static bool virtio_broken_needed(void *opaque)
2631 {
2632     VirtIODevice *vdev = opaque;
2633 
2634     return vdev->broken;
2635 }
2636 
2637 static bool virtio_started_needed(void *opaque)
2638 {
2639     VirtIODevice *vdev = opaque;
2640 
2641     return vdev->started;
2642 }
2643 
2644 static bool virtio_disabled_needed(void *opaque)
2645 {
2646     VirtIODevice *vdev = opaque;
2647 
2648     return vdev->disabled;
2649 }
2650 
2651 static const VMStateDescription vmstate_virtqueue = {
2652     .name = "virtqueue_state",
2653     .version_id = 1,
2654     .minimum_version_id = 1,
2655     .fields = (VMStateField[]) {
2656         VMSTATE_UINT64(vring.avail, struct VirtQueue),
2657         VMSTATE_UINT64(vring.used, struct VirtQueue),
2658         VMSTATE_END_OF_LIST()
2659     }
2660 };
2661 
2662 static const VMStateDescription vmstate_packed_virtqueue = {
2663     .name = "packed_virtqueue_state",
2664     .version_id = 1,
2665     .minimum_version_id = 1,
2666     .fields = (VMStateField[]) {
2667         VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2668         VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2669         VMSTATE_UINT16(used_idx, struct VirtQueue),
2670         VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2671         VMSTATE_UINT32(inuse, struct VirtQueue),
2672         VMSTATE_END_OF_LIST()
2673     }
2674 };
2675 
2676 static const VMStateDescription vmstate_virtio_virtqueues = {
2677     .name = "virtio/virtqueues",
2678     .version_id = 1,
2679     .minimum_version_id = 1,
2680     .needed = &virtio_virtqueue_needed,
2681     .fields = (VMStateField[]) {
2682         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2683                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2684         VMSTATE_END_OF_LIST()
2685     }
2686 };
2687 
2688 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2689     .name = "virtio/packed_virtqueues",
2690     .version_id = 1,
2691     .minimum_version_id = 1,
2692     .needed = &virtio_packed_virtqueue_needed,
2693     .fields = (VMStateField[]) {
2694         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2695                       VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2696         VMSTATE_END_OF_LIST()
2697     }
2698 };
2699 
2700 static const VMStateDescription vmstate_ringsize = {
2701     .name = "ringsize_state",
2702     .version_id = 1,
2703     .minimum_version_id = 1,
2704     .fields = (VMStateField[]) {
2705         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2706         VMSTATE_END_OF_LIST()
2707     }
2708 };
2709 
2710 static const VMStateDescription vmstate_virtio_ringsize = {
2711     .name = "virtio/ringsize",
2712     .version_id = 1,
2713     .minimum_version_id = 1,
2714     .needed = &virtio_ringsize_needed,
2715     .fields = (VMStateField[]) {
2716         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2717                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2718         VMSTATE_END_OF_LIST()
2719     }
2720 };
2721 
2722 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2723                            const VMStateField *field)
2724 {
2725     VirtIODevice *vdev = pv;
2726     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2727     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2728 
2729     if (!k->load_extra_state) {
2730         return -1;
2731     } else {
2732         return k->load_extra_state(qbus->parent, f);
2733     }
2734 }
2735 
2736 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2737                            const VMStateField *field, JSONWriter *vmdesc)
2738 {
2739     VirtIODevice *vdev = pv;
2740     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2741     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2742 
2743     k->save_extra_state(qbus->parent, f);
2744     return 0;
2745 }
2746 
2747 static const VMStateInfo vmstate_info_extra_state = {
2748     .name = "virtqueue_extra_state",
2749     .get = get_extra_state,
2750     .put = put_extra_state,
2751 };
2752 
2753 static const VMStateDescription vmstate_virtio_extra_state = {
2754     .name = "virtio/extra_state",
2755     .version_id = 1,
2756     .minimum_version_id = 1,
2757     .needed = &virtio_extra_state_needed,
2758     .fields = (VMStateField[]) {
2759         {
2760             .name         = "extra_state",
2761             .version_id   = 0,
2762             .field_exists = NULL,
2763             .size         = 0,
2764             .info         = &vmstate_info_extra_state,
2765             .flags        = VMS_SINGLE,
2766             .offset       = 0,
2767         },
2768         VMSTATE_END_OF_LIST()
2769     }
2770 };
2771 
2772 static const VMStateDescription vmstate_virtio_device_endian = {
2773     .name = "virtio/device_endian",
2774     .version_id = 1,
2775     .minimum_version_id = 1,
2776     .needed = &virtio_device_endian_needed,
2777     .fields = (VMStateField[]) {
2778         VMSTATE_UINT8(device_endian, VirtIODevice),
2779         VMSTATE_END_OF_LIST()
2780     }
2781 };
2782 
2783 static const VMStateDescription vmstate_virtio_64bit_features = {
2784     .name = "virtio/64bit_features",
2785     .version_id = 1,
2786     .minimum_version_id = 1,
2787     .needed = &virtio_64bit_features_needed,
2788     .fields = (VMStateField[]) {
2789         VMSTATE_UINT64(guest_features, VirtIODevice),
2790         VMSTATE_END_OF_LIST()
2791     }
2792 };
2793 
2794 static const VMStateDescription vmstate_virtio_broken = {
2795     .name = "virtio/broken",
2796     .version_id = 1,
2797     .minimum_version_id = 1,
2798     .needed = &virtio_broken_needed,
2799     .fields = (VMStateField[]) {
2800         VMSTATE_BOOL(broken, VirtIODevice),
2801         VMSTATE_END_OF_LIST()
2802     }
2803 };
2804 
2805 static const VMStateDescription vmstate_virtio_started = {
2806     .name = "virtio/started",
2807     .version_id = 1,
2808     .minimum_version_id = 1,
2809     .needed = &virtio_started_needed,
2810     .fields = (VMStateField[]) {
2811         VMSTATE_BOOL(started, VirtIODevice),
2812         VMSTATE_END_OF_LIST()
2813     }
2814 };
2815 
2816 static const VMStateDescription vmstate_virtio_disabled = {
2817     .name = "virtio/disabled",
2818     .version_id = 1,
2819     .minimum_version_id = 1,
2820     .needed = &virtio_disabled_needed,
2821     .fields = (VMStateField[]) {
2822         VMSTATE_BOOL(disabled, VirtIODevice),
2823         VMSTATE_END_OF_LIST()
2824     }
2825 };
2826 
2827 static const VMStateDescription vmstate_virtio = {
2828     .name = "virtio",
2829     .version_id = 1,
2830     .minimum_version_id = 1,
2831     .minimum_version_id_old = 1,
2832     .fields = (VMStateField[]) {
2833         VMSTATE_END_OF_LIST()
2834     },
2835     .subsections = (const VMStateDescription*[]) {
2836         &vmstate_virtio_device_endian,
2837         &vmstate_virtio_64bit_features,
2838         &vmstate_virtio_virtqueues,
2839         &vmstate_virtio_ringsize,
2840         &vmstate_virtio_broken,
2841         &vmstate_virtio_extra_state,
2842         &vmstate_virtio_started,
2843         &vmstate_virtio_packed_virtqueues,
2844         &vmstate_virtio_disabled,
2845         NULL
2846     }
2847 };
2848 
2849 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2850 {
2851     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2852     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2853     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2854     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2855     int i;
2856 
2857     if (k->save_config) {
2858         k->save_config(qbus->parent, f);
2859     }
2860 
2861     qemu_put_8s(f, &vdev->status);
2862     qemu_put_8s(f, &vdev->isr);
2863     qemu_put_be16s(f, &vdev->queue_sel);
2864     qemu_put_be32s(f, &guest_features_lo);
2865     qemu_put_be32(f, vdev->config_len);
2866     qemu_put_buffer(f, vdev->config, vdev->config_len);
2867 
2868     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2869         if (vdev->vq[i].vring.num == 0)
2870             break;
2871     }
2872 
2873     qemu_put_be32(f, i);
2874 
2875     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2876         if (vdev->vq[i].vring.num == 0)
2877             break;
2878 
2879         qemu_put_be32(f, vdev->vq[i].vring.num);
2880         if (k->has_variable_vring_alignment) {
2881             qemu_put_be32(f, vdev->vq[i].vring.align);
2882         }
2883         /*
2884          * Save desc now, the rest of the ring addresses are saved in
2885          * subsections for VIRTIO-1 devices.
2886          */
2887         qemu_put_be64(f, vdev->vq[i].vring.desc);
2888         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2889         if (k->save_queue) {
2890             k->save_queue(qbus->parent, i, f);
2891         }
2892     }
2893 
2894     if (vdc->save != NULL) {
2895         vdc->save(vdev, f);
2896     }
2897 
2898     if (vdc->vmsd) {
2899         int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2900         if (ret) {
2901             return ret;
2902         }
2903     }
2904 
2905     /* Subsections */
2906     return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2907 }
2908 
2909 /* A wrapper for use as a VMState .put function */
2910 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2911                               const VMStateField *field, JSONWriter *vmdesc)
2912 {
2913     return virtio_save(VIRTIO_DEVICE(opaque), f);
2914 }
2915 
2916 /* A wrapper for use as a VMState .get function */
2917 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2918                              const VMStateField *field)
2919 {
2920     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2921     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2922 
2923     return virtio_load(vdev, f, dc->vmsd->version_id);
2924 }
2925 
2926 const VMStateInfo  virtio_vmstate_info = {
2927     .name = "virtio",
2928     .get = virtio_device_get,
2929     .put = virtio_device_put,
2930 };
2931 
2932 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2933 {
2934     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2935     bool bad = (val & ~(vdev->host_features)) != 0;
2936 
2937     val &= vdev->host_features;
2938     if (k->set_features) {
2939         k->set_features(vdev, val);
2940     }
2941     vdev->guest_features = val;
2942     return bad ? -1 : 0;
2943 }
2944 
2945 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2946 {
2947     int ret;
2948     /*
2949      * The driver must not attempt to set features after feature negotiation
2950      * has finished.
2951      */
2952     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2953         return -EINVAL;
2954     }
2955     ret = virtio_set_features_nocheck(vdev, val);
2956     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2957         /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2958         int i;
2959         for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2960             if (vdev->vq[i].vring.num != 0) {
2961                 virtio_init_region_cache(vdev, i);
2962             }
2963         }
2964     }
2965     if (!ret) {
2966         if (!virtio_device_started(vdev, vdev->status) &&
2967             !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2968             vdev->start_on_kick = true;
2969         }
2970     }
2971     return ret;
2972 }
2973 
2974 size_t virtio_feature_get_config_size(const VirtIOFeature *feature_sizes,
2975                                       uint64_t host_features)
2976 {
2977     size_t config_size = 0;
2978     int i;
2979 
2980     for (i = 0; feature_sizes[i].flags != 0; i++) {
2981         if (host_features & feature_sizes[i].flags) {
2982             config_size = MAX(feature_sizes[i].end, config_size);
2983         }
2984     }
2985 
2986     return config_size;
2987 }
2988 
2989 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
2990 {
2991     int i, ret;
2992     int32_t config_len;
2993     uint32_t num;
2994     uint32_t features;
2995     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2996     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2997     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2998 
2999     /*
3000      * We poison the endianness to ensure it does not get used before
3001      * subsections have been loaded.
3002      */
3003     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3004 
3005     if (k->load_config) {
3006         ret = k->load_config(qbus->parent, f);
3007         if (ret)
3008             return ret;
3009     }
3010 
3011     qemu_get_8s(f, &vdev->status);
3012     qemu_get_8s(f, &vdev->isr);
3013     qemu_get_be16s(f, &vdev->queue_sel);
3014     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3015         return -1;
3016     }
3017     qemu_get_be32s(f, &features);
3018 
3019     /*
3020      * Temporarily set guest_features low bits - needed by
3021      * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3022      * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3023      *
3024      * Note: devices should always test host features in future - don't create
3025      * new dependencies like this.
3026      */
3027     vdev->guest_features = features;
3028 
3029     config_len = qemu_get_be32(f);
3030 
3031     /*
3032      * There are cases where the incoming config can be bigger or smaller
3033      * than what we have; so load what we have space for, and skip
3034      * any excess that's in the stream.
3035      */
3036     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3037 
3038     while (config_len > vdev->config_len) {
3039         qemu_get_byte(f);
3040         config_len--;
3041     }
3042 
3043     num = qemu_get_be32(f);
3044 
3045     if (num > VIRTIO_QUEUE_MAX) {
3046         error_report("Invalid number of virtqueues: 0x%x", num);
3047         return -1;
3048     }
3049 
3050     for (i = 0; i < num; i++) {
3051         vdev->vq[i].vring.num = qemu_get_be32(f);
3052         if (k->has_variable_vring_alignment) {
3053             vdev->vq[i].vring.align = qemu_get_be32(f);
3054         }
3055         vdev->vq[i].vring.desc = qemu_get_be64(f);
3056         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3057         vdev->vq[i].signalled_used_valid = false;
3058         vdev->vq[i].notification = true;
3059 
3060         if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3061             error_report("VQ %d address 0x0 "
3062                          "inconsistent with Host index 0x%x",
3063                          i, vdev->vq[i].last_avail_idx);
3064             return -1;
3065         }
3066         if (k->load_queue) {
3067             ret = k->load_queue(qbus->parent, i, f);
3068             if (ret)
3069                 return ret;
3070         }
3071     }
3072 
3073     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3074 
3075     if (vdc->load != NULL) {
3076         ret = vdc->load(vdev, f, version_id);
3077         if (ret) {
3078             return ret;
3079         }
3080     }
3081 
3082     if (vdc->vmsd) {
3083         ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3084         if (ret) {
3085             return ret;
3086         }
3087     }
3088 
3089     /* Subsections */
3090     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3091     if (ret) {
3092         return ret;
3093     }
3094 
3095     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3096         vdev->device_endian = virtio_default_endian();
3097     }
3098 
3099     if (virtio_64bit_features_needed(vdev)) {
3100         /*
3101          * Subsection load filled vdev->guest_features.  Run them
3102          * through virtio_set_features to sanity-check them against
3103          * host_features.
3104          */
3105         uint64_t features64 = vdev->guest_features;
3106         if (virtio_set_features_nocheck(vdev, features64) < 0) {
3107             error_report("Features 0x%" PRIx64 " unsupported. "
3108                          "Allowed features: 0x%" PRIx64,
3109                          features64, vdev->host_features);
3110             return -1;
3111         }
3112     } else {
3113         if (virtio_set_features_nocheck(vdev, features) < 0) {
3114             error_report("Features 0x%x unsupported. "
3115                          "Allowed features: 0x%" PRIx64,
3116                          features, vdev->host_features);
3117             return -1;
3118         }
3119     }
3120 
3121     if (!virtio_device_started(vdev, vdev->status) &&
3122         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3123         vdev->start_on_kick = true;
3124     }
3125 
3126     RCU_READ_LOCK_GUARD();
3127     for (i = 0; i < num; i++) {
3128         if (vdev->vq[i].vring.desc) {
3129             uint16_t nheads;
3130 
3131             /*
3132              * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3133              * only the region cache needs to be set up.  Legacy devices need
3134              * to calculate used and avail ring addresses based on the desc
3135              * address.
3136              */
3137             if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3138                 virtio_init_region_cache(vdev, i);
3139             } else {
3140                 virtio_queue_update_rings(vdev, i);
3141             }
3142 
3143             if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3144                 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3145                 vdev->vq[i].shadow_avail_wrap_counter =
3146                                         vdev->vq[i].last_avail_wrap_counter;
3147                 continue;
3148             }
3149 
3150             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3151             /* Check it isn't doing strange things with descriptor numbers. */
3152             if (nheads > vdev->vq[i].vring.num) {
3153                 virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3154                              "inconsistent with Host index 0x%x: delta 0x%x",
3155                              i, vdev->vq[i].vring.num,
3156                              vring_avail_idx(&vdev->vq[i]),
3157                              vdev->vq[i].last_avail_idx, nheads);
3158                 vdev->vq[i].used_idx = 0;
3159                 vdev->vq[i].shadow_avail_idx = 0;
3160                 vdev->vq[i].inuse = 0;
3161                 continue;
3162             }
3163             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3164             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3165 
3166             /*
3167              * Some devices migrate VirtQueueElements that have been popped
3168              * from the avail ring but not yet returned to the used ring.
3169              * Since max ring size < UINT16_MAX it's safe to use modulo
3170              * UINT16_MAX + 1 subtraction.
3171              */
3172             vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3173                                 vdev->vq[i].used_idx);
3174             if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3175                 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3176                              "used_idx 0x%x",
3177                              i, vdev->vq[i].vring.num,
3178                              vdev->vq[i].last_avail_idx,
3179                              vdev->vq[i].used_idx);
3180                 return -1;
3181             }
3182         }
3183     }
3184 
3185     if (vdc->post_load) {
3186         ret = vdc->post_load(vdev);
3187         if (ret) {
3188             return ret;
3189         }
3190     }
3191 
3192     return 0;
3193 }
3194 
3195 void virtio_cleanup(VirtIODevice *vdev)
3196 {
3197     qemu_del_vm_change_state_handler(vdev->vmstate);
3198 }
3199 
3200 static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3201 {
3202     VirtIODevice *vdev = opaque;
3203     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3204     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3205     bool backend_run = running && virtio_device_started(vdev, vdev->status);
3206     vdev->vm_running = running;
3207 
3208     if (backend_run) {
3209         virtio_set_status(vdev, vdev->status);
3210     }
3211 
3212     if (k->vmstate_change) {
3213         k->vmstate_change(qbus->parent, backend_run);
3214     }
3215 
3216     if (!backend_run) {
3217         virtio_set_status(vdev, vdev->status);
3218     }
3219 }
3220 
3221 void virtio_instance_init_common(Object *proxy_obj, void *data,
3222                                  size_t vdev_size, const char *vdev_name)
3223 {
3224     DeviceState *vdev = data;
3225 
3226     object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3227                                        vdev_size, vdev_name, &error_abort,
3228                                        NULL);
3229     qdev_alias_all_properties(vdev, proxy_obj);
3230 }
3231 
3232 void virtio_init(VirtIODevice *vdev, const char *name,
3233                  uint16_t device_id, size_t config_size)
3234 {
3235     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3236     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3237     int i;
3238     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3239 
3240     if (nvectors) {
3241         vdev->vector_queues =
3242             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3243     }
3244 
3245     vdev->start_on_kick = false;
3246     vdev->started = false;
3247     vdev->device_id = device_id;
3248     vdev->status = 0;
3249     qatomic_set(&vdev->isr, 0);
3250     vdev->queue_sel = 0;
3251     vdev->config_vector = VIRTIO_NO_VECTOR;
3252     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
3253     vdev->vm_running = runstate_is_running();
3254     vdev->broken = false;
3255     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3256         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3257         vdev->vq[i].vdev = vdev;
3258         vdev->vq[i].queue_index = i;
3259         vdev->vq[i].host_notifier_enabled = false;
3260     }
3261 
3262     vdev->name = name;
3263     vdev->config_len = config_size;
3264     if (vdev->config_len) {
3265         vdev->config = g_malloc0(config_size);
3266     } else {
3267         vdev->config = NULL;
3268     }
3269     vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3270             virtio_vmstate_change, vdev);
3271     vdev->device_endian = virtio_default_endian();
3272     vdev->use_guest_notifier_mask = true;
3273 }
3274 
3275 /*
3276  * Only devices that have already been around prior to defining the virtio
3277  * standard support legacy mode; this includes devices not specified in the
3278  * standard. All newer devices conform to the virtio standard only.
3279  */
3280 bool virtio_legacy_allowed(VirtIODevice *vdev)
3281 {
3282     switch (vdev->device_id) {
3283     case VIRTIO_ID_NET:
3284     case VIRTIO_ID_BLOCK:
3285     case VIRTIO_ID_CONSOLE:
3286     case VIRTIO_ID_RNG:
3287     case VIRTIO_ID_BALLOON:
3288     case VIRTIO_ID_RPMSG:
3289     case VIRTIO_ID_SCSI:
3290     case VIRTIO_ID_9P:
3291     case VIRTIO_ID_RPROC_SERIAL:
3292     case VIRTIO_ID_CAIF:
3293         return true;
3294     default:
3295         return false;
3296     }
3297 }
3298 
3299 bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3300 {
3301     return vdev->disable_legacy_check;
3302 }
3303 
3304 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3305 {
3306     return vdev->vq[n].vring.desc;
3307 }
3308 
3309 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3310 {
3311     return virtio_queue_get_desc_addr(vdev, n) != 0;
3312 }
3313 
3314 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3315 {
3316     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3317     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3318 
3319     if (k->queue_enabled) {
3320         return k->queue_enabled(qbus->parent, n);
3321     }
3322     return virtio_queue_enabled_legacy(vdev, n);
3323 }
3324 
3325 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3326 {
3327     return vdev->vq[n].vring.avail;
3328 }
3329 
3330 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3331 {
3332     return vdev->vq[n].vring.used;
3333 }
3334 
3335 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3336 {
3337     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3338 }
3339 
3340 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3341 {
3342     int s;
3343 
3344     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3345         return sizeof(struct VRingPackedDescEvent);
3346     }
3347 
3348     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3349     return offsetof(VRingAvail, ring) +
3350         sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3351 }
3352 
3353 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3354 {
3355     int s;
3356 
3357     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3358         return sizeof(struct VRingPackedDescEvent);
3359     }
3360 
3361     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3362     return offsetof(VRingUsed, ring) +
3363         sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3364 }
3365 
3366 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3367                                                            int n)
3368 {
3369     unsigned int avail, used;
3370 
3371     avail = vdev->vq[n].last_avail_idx;
3372     avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3373 
3374     used = vdev->vq[n].used_idx;
3375     used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3376 
3377     return avail | used << 16;
3378 }
3379 
3380 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3381                                                       int n)
3382 {
3383     return vdev->vq[n].last_avail_idx;
3384 }
3385 
3386 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3387 {
3388     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3389         return virtio_queue_packed_get_last_avail_idx(vdev, n);
3390     } else {
3391         return virtio_queue_split_get_last_avail_idx(vdev, n);
3392     }
3393 }
3394 
3395 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3396                                                    int n, unsigned int idx)
3397 {
3398     struct VirtQueue *vq = &vdev->vq[n];
3399 
3400     vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3401     vq->last_avail_wrap_counter =
3402         vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3403     idx >>= 16;
3404     vq->used_idx = idx & 0x7ffff;
3405     vq->used_wrap_counter = !!(idx & 0x8000);
3406 }
3407 
3408 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3409                                                   int n, unsigned int idx)
3410 {
3411         vdev->vq[n].last_avail_idx = idx;
3412         vdev->vq[n].shadow_avail_idx = idx;
3413 }
3414 
3415 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3416                                      unsigned int idx)
3417 {
3418     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3419         virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3420     } else {
3421         virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3422     }
3423 }
3424 
3425 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3426                                                        int n)
3427 {
3428     /* We don't have a reference like avail idx in shared memory */
3429     return;
3430 }
3431 
3432 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3433                                                       int n)
3434 {
3435     RCU_READ_LOCK_GUARD();
3436     if (vdev->vq[n].vring.desc) {
3437         vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3438         vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3439     }
3440 }
3441 
3442 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3443 {
3444     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3445         virtio_queue_packed_restore_last_avail_idx(vdev, n);
3446     } else {
3447         virtio_queue_split_restore_last_avail_idx(vdev, n);
3448     }
3449 }
3450 
3451 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3452 {
3453     /* used idx was updated through set_last_avail_idx() */
3454     return;
3455 }
3456 
3457 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3458 {
3459     RCU_READ_LOCK_GUARD();
3460     if (vdev->vq[n].vring.desc) {
3461         vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3462     }
3463 }
3464 
3465 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3466 {
3467     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3468         return virtio_queue_packed_update_used_idx(vdev, n);
3469     } else {
3470         return virtio_split_packed_update_used_idx(vdev, n);
3471     }
3472 }
3473 
3474 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3475 {
3476     vdev->vq[n].signalled_used_valid = false;
3477 }
3478 
3479 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3480 {
3481     return vdev->vq + n;
3482 }
3483 
3484 uint16_t virtio_get_queue_index(VirtQueue *vq)
3485 {
3486     return vq->queue_index;
3487 }
3488 
3489 static void virtio_queue_guest_notifier_read(EventNotifier *n)
3490 {
3491     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3492     if (event_notifier_test_and_clear(n)) {
3493         virtio_irq(vq);
3494     }
3495 }
3496 
3497 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3498                                                 bool with_irqfd)
3499 {
3500     if (assign && !with_irqfd) {
3501         event_notifier_set_handler(&vq->guest_notifier,
3502                                    virtio_queue_guest_notifier_read);
3503     } else {
3504         event_notifier_set_handler(&vq->guest_notifier, NULL);
3505     }
3506     if (!assign) {
3507         /* Test and clear notifier before closing it,
3508          * in case poll callback didn't have time to run. */
3509         virtio_queue_guest_notifier_read(&vq->guest_notifier);
3510     }
3511 }
3512 
3513 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3514 {
3515     return &vq->guest_notifier;
3516 }
3517 
3518 static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
3519 {
3520     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3521     if (event_notifier_test_and_clear(n)) {
3522         virtio_queue_notify_aio_vq(vq);
3523     }
3524 }
3525 
3526 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3527 {
3528     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3529 
3530     virtio_queue_set_notification(vq, 0);
3531 }
3532 
3533 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3534 {
3535     EventNotifier *n = opaque;
3536     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3537 
3538     if (!vq->vring.desc || virtio_queue_empty(vq)) {
3539         return false;
3540     }
3541 
3542     return virtio_queue_notify_aio_vq(vq);
3543 }
3544 
3545 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3546 {
3547     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3548 
3549     /* Caller polls once more after this to catch requests that race with us */
3550     virtio_queue_set_notification(vq, 1);
3551 }
3552 
3553 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
3554                                                 VirtIOHandleAIOOutput handle_output)
3555 {
3556     if (handle_output) {
3557         vq->handle_aio_output = handle_output;
3558         aio_set_event_notifier(ctx, &vq->host_notifier, true,
3559                                virtio_queue_host_notifier_aio_read,
3560                                virtio_queue_host_notifier_aio_poll);
3561         aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3562                                     virtio_queue_host_notifier_aio_poll_begin,
3563                                     virtio_queue_host_notifier_aio_poll_end);
3564     } else {
3565         aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
3566         /* Test and clear notifier before after disabling event,
3567          * in case poll callback didn't have time to run. */
3568         virtio_queue_host_notifier_aio_read(&vq->host_notifier);
3569         vq->handle_aio_output = NULL;
3570     }
3571 }
3572 
3573 void virtio_queue_host_notifier_read(EventNotifier *n)
3574 {
3575     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3576     if (event_notifier_test_and_clear(n)) {
3577         virtio_queue_notify_vq(vq);
3578     }
3579 }
3580 
3581 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3582 {
3583     return &vq->host_notifier;
3584 }
3585 
3586 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3587 {
3588     vq->host_notifier_enabled = enabled;
3589 }
3590 
3591 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3592                                       MemoryRegion *mr, bool assign)
3593 {
3594     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3595     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3596 
3597     if (k->set_host_notifier_mr) {
3598         return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3599     }
3600 
3601     return -1;
3602 }
3603 
3604 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3605 {
3606     g_free(vdev->bus_name);
3607     vdev->bus_name = g_strdup(bus_name);
3608 }
3609 
3610 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3611 {
3612     va_list ap;
3613 
3614     va_start(ap, fmt);
3615     error_vreport(fmt, ap);
3616     va_end(ap);
3617 
3618     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3619         vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3620         virtio_notify_config(vdev);
3621     }
3622 
3623     vdev->broken = true;
3624 }
3625 
3626 static void virtio_memory_listener_commit(MemoryListener *listener)
3627 {
3628     VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3629     int i;
3630 
3631     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3632         if (vdev->vq[i].vring.num == 0) {
3633             break;
3634         }
3635         virtio_init_region_cache(vdev, i);
3636     }
3637 }
3638 
3639 static void virtio_device_realize(DeviceState *dev, Error **errp)
3640 {
3641     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3642     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3643     Error *err = NULL;
3644 
3645     /* Devices should either use vmsd or the load/save methods */
3646     assert(!vdc->vmsd || !vdc->load);
3647 
3648     if (vdc->realize != NULL) {
3649         vdc->realize(dev, &err);
3650         if (err != NULL) {
3651             error_propagate(errp, err);
3652             return;
3653         }
3654     }
3655 
3656     virtio_bus_device_plugged(vdev, &err);
3657     if (err != NULL) {
3658         error_propagate(errp, err);
3659         vdc->unrealize(dev);
3660         return;
3661     }
3662 
3663     vdev->listener.commit = virtio_memory_listener_commit;
3664     vdev->listener.name = "virtio";
3665     memory_listener_register(&vdev->listener, vdev->dma_as);
3666 }
3667 
3668 static void virtio_device_unrealize(DeviceState *dev)
3669 {
3670     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3671     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3672 
3673     memory_listener_unregister(&vdev->listener);
3674     virtio_bus_device_unplugged(vdev);
3675 
3676     if (vdc->unrealize != NULL) {
3677         vdc->unrealize(dev);
3678     }
3679 
3680     g_free(vdev->bus_name);
3681     vdev->bus_name = NULL;
3682 }
3683 
3684 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3685 {
3686     int i;
3687     if (!vdev->vq) {
3688         return;
3689     }
3690 
3691     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3692         if (vdev->vq[i].vring.num == 0) {
3693             break;
3694         }
3695         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3696     }
3697     g_free(vdev->vq);
3698 }
3699 
3700 static void virtio_device_instance_finalize(Object *obj)
3701 {
3702     VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3703 
3704     virtio_device_free_virtqueues(vdev);
3705 
3706     g_free(vdev->config);
3707     g_free(vdev->vector_queues);
3708 }
3709 
3710 static Property virtio_properties[] = {
3711     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3712     DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3713     DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3714     DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3715                      disable_legacy_check, false),
3716     DEFINE_PROP_END_OF_LIST(),
3717 };
3718 
3719 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3720 {
3721     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3722     int i, n, r, err;
3723 
3724     /*
3725      * Batch all the host notifiers in a single transaction to avoid
3726      * quadratic time complexity in address_space_update_ioeventfds().
3727      */
3728     memory_region_transaction_begin();
3729     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3730         VirtQueue *vq = &vdev->vq[n];
3731         if (!virtio_queue_get_num(vdev, n)) {
3732             continue;
3733         }
3734         r = virtio_bus_set_host_notifier(qbus, n, true);
3735         if (r < 0) {
3736             err = r;
3737             goto assign_error;
3738         }
3739         event_notifier_set_handler(&vq->host_notifier,
3740                                    virtio_queue_host_notifier_read);
3741     }
3742 
3743     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3744         /* Kick right away to begin processing requests already in vring */
3745         VirtQueue *vq = &vdev->vq[n];
3746         if (!vq->vring.num) {
3747             continue;
3748         }
3749         event_notifier_set(&vq->host_notifier);
3750     }
3751     memory_region_transaction_commit();
3752     return 0;
3753 
3754 assign_error:
3755     i = n; /* save n for a second iteration after transaction is committed. */
3756     while (--n >= 0) {
3757         VirtQueue *vq = &vdev->vq[n];
3758         if (!virtio_queue_get_num(vdev, n)) {
3759             continue;
3760         }
3761 
3762         event_notifier_set_handler(&vq->host_notifier, NULL);
3763         r = virtio_bus_set_host_notifier(qbus, n, false);
3764         assert(r >= 0);
3765     }
3766     /*
3767      * The transaction expects the ioeventfds to be open when it
3768      * commits. Do it now, before the cleanup loop.
3769      */
3770     memory_region_transaction_commit();
3771 
3772     while (--i >= 0) {
3773         if (!virtio_queue_get_num(vdev, i)) {
3774             continue;
3775         }
3776         virtio_bus_cleanup_host_notifier(qbus, i);
3777     }
3778     return err;
3779 }
3780 
3781 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3782 {
3783     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3784     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3785 
3786     return virtio_bus_start_ioeventfd(vbus);
3787 }
3788 
3789 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3790 {
3791     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3792     int n, r;
3793 
3794     /*
3795      * Batch all the host notifiers in a single transaction to avoid
3796      * quadratic time complexity in address_space_update_ioeventfds().
3797      */
3798     memory_region_transaction_begin();
3799     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3800         VirtQueue *vq = &vdev->vq[n];
3801 
3802         if (!virtio_queue_get_num(vdev, n)) {
3803             continue;
3804         }
3805         event_notifier_set_handler(&vq->host_notifier, NULL);
3806         r = virtio_bus_set_host_notifier(qbus, n, false);
3807         assert(r >= 0);
3808     }
3809     /*
3810      * The transaction expects the ioeventfds to be open when it
3811      * commits. Do it now, before the cleanup loop.
3812      */
3813     memory_region_transaction_commit();
3814 
3815     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3816         if (!virtio_queue_get_num(vdev, n)) {
3817             continue;
3818         }
3819         virtio_bus_cleanup_host_notifier(qbus, n);
3820     }
3821 }
3822 
3823 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3824 {
3825     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3826     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3827 
3828     return virtio_bus_grab_ioeventfd(vbus);
3829 }
3830 
3831 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3832 {
3833     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3834     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3835 
3836     virtio_bus_release_ioeventfd(vbus);
3837 }
3838 
3839 static void virtio_device_class_init(ObjectClass *klass, void *data)
3840 {
3841     /* Set the default value here. */
3842     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3843     DeviceClass *dc = DEVICE_CLASS(klass);
3844 
3845     dc->realize = virtio_device_realize;
3846     dc->unrealize = virtio_device_unrealize;
3847     dc->bus_type = TYPE_VIRTIO_BUS;
3848     device_class_set_props(dc, virtio_properties);
3849     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3850     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3851 
3852     vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3853 }
3854 
3855 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3856 {
3857     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3858     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3859 
3860     return virtio_bus_ioeventfd_enabled(vbus);
3861 }
3862 
3863 static const TypeInfo virtio_device_info = {
3864     .name = TYPE_VIRTIO_DEVICE,
3865     .parent = TYPE_DEVICE,
3866     .instance_size = sizeof(VirtIODevice),
3867     .class_init = virtio_device_class_init,
3868     .instance_finalize = virtio_device_instance_finalize,
3869     .abstract = true,
3870     .class_size = sizeof(VirtioDeviceClass),
3871 };
3872 
3873 static void virtio_register_types(void)
3874 {
3875     type_register_static(&virtio_device_info);
3876 }
3877 
3878 type_init(virtio_register_types)
3879