xref: /openbmc/qemu/hw/virtio/virtio.c (revision d93d16c0450b3d0fe8e25568663531eb250d6aae)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "cpu.h"
17 #include "trace.h"
18 #include "qemu/error-report.h"
19 #include "qemu/log.h"
20 #include "qemu/main-loop.h"
21 #include "qemu/module.h"
22 #include "hw/virtio/virtio.h"
23 #include "migration/qemu-file-types.h"
24 #include "qemu/atomic.h"
25 #include "hw/virtio/virtio-bus.h"
26 #include "hw/qdev-properties.h"
27 #include "hw/virtio/virtio-access.h"
28 #include "sysemu/dma.h"
29 #include "sysemu/runstate.h"
30 #include "standard-headers/linux/virtio_ids.h"
31 
32 /*
33  * The alignment to use between consumer and producer parts of vring.
34  * x86 pagesize again. This is the default, used by transports like PCI
35  * which don't provide a means for the guest to tell the host the alignment.
36  */
37 #define VIRTIO_PCI_VRING_ALIGN         4096
38 
39 typedef struct VRingDesc
40 {
41     uint64_t addr;
42     uint32_t len;
43     uint16_t flags;
44     uint16_t next;
45 } VRingDesc;
46 
47 typedef struct VRingPackedDesc {
48     uint64_t addr;
49     uint32_t len;
50     uint16_t id;
51     uint16_t flags;
52 } VRingPackedDesc;
53 
54 typedef struct VRingAvail
55 {
56     uint16_t flags;
57     uint16_t idx;
58     uint16_t ring[];
59 } VRingAvail;
60 
61 typedef struct VRingUsedElem
62 {
63     uint32_t id;
64     uint32_t len;
65 } VRingUsedElem;
66 
67 typedef struct VRingUsed
68 {
69     uint16_t flags;
70     uint16_t idx;
71     VRingUsedElem ring[];
72 } VRingUsed;
73 
74 typedef struct VRingMemoryRegionCaches {
75     struct rcu_head rcu;
76     MemoryRegionCache desc;
77     MemoryRegionCache avail;
78     MemoryRegionCache used;
79 } VRingMemoryRegionCaches;
80 
81 typedef struct VRing
82 {
83     unsigned int num;
84     unsigned int num_default;
85     unsigned int align;
86     hwaddr desc;
87     hwaddr avail;
88     hwaddr used;
89     VRingMemoryRegionCaches *caches;
90 } VRing;
91 
92 typedef struct VRingPackedDescEvent {
93     uint16_t off_wrap;
94     uint16_t flags;
95 } VRingPackedDescEvent ;
96 
97 struct VirtQueue
98 {
99     VRing vring;
100     VirtQueueElement *used_elems;
101 
102     /* Next head to pop */
103     uint16_t last_avail_idx;
104     bool last_avail_wrap_counter;
105 
106     /* Last avail_idx read from VQ. */
107     uint16_t shadow_avail_idx;
108     bool shadow_avail_wrap_counter;
109 
110     uint16_t used_idx;
111     bool used_wrap_counter;
112 
113     /* Last used index value we have signalled on */
114     uint16_t signalled_used;
115 
116     /* Last used index value we have signalled on */
117     bool signalled_used_valid;
118 
119     /* Notification enabled? */
120     bool notification;
121 
122     uint16_t queue_index;
123 
124     unsigned int inuse;
125 
126     uint16_t vector;
127     VirtIOHandleOutput handle_output;
128     VirtIOHandleOutput handle_aio_output;
129     VirtIODevice *vdev;
130     EventNotifier guest_notifier;
131     EventNotifier host_notifier;
132     bool host_notifier_enabled;
133     QLIST_ENTRY(VirtQueue) node;
134 };
135 
136 /* Called within call_rcu().  */
137 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
138 {
139     assert(caches != NULL);
140     address_space_cache_destroy(&caches->desc);
141     address_space_cache_destroy(&caches->avail);
142     address_space_cache_destroy(&caches->used);
143     g_free(caches);
144 }
145 
146 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
147 {
148     VRingMemoryRegionCaches *caches;
149 
150     caches = qatomic_read(&vq->vring.caches);
151     qatomic_rcu_set(&vq->vring.caches, NULL);
152     if (caches) {
153         call_rcu(caches, virtio_free_region_cache, rcu);
154     }
155 }
156 
157 static void virtio_init_region_cache(VirtIODevice *vdev, int n)
158 {
159     VirtQueue *vq = &vdev->vq[n];
160     VRingMemoryRegionCaches *old = vq->vring.caches;
161     VRingMemoryRegionCaches *new = NULL;
162     hwaddr addr, size;
163     int64_t len;
164     bool packed;
165 
166 
167     addr = vq->vring.desc;
168     if (!addr) {
169         goto out_no_cache;
170     }
171     new = g_new0(VRingMemoryRegionCaches, 1);
172     size = virtio_queue_get_desc_size(vdev, n);
173     packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
174                                    true : false;
175     len = address_space_cache_init(&new->desc, vdev->dma_as,
176                                    addr, size, packed);
177     if (len < size) {
178         virtio_error(vdev, "Cannot map desc");
179         goto err_desc;
180     }
181 
182     size = virtio_queue_get_used_size(vdev, n);
183     len = address_space_cache_init(&new->used, vdev->dma_as,
184                                    vq->vring.used, size, true);
185     if (len < size) {
186         virtio_error(vdev, "Cannot map used");
187         goto err_used;
188     }
189 
190     size = virtio_queue_get_avail_size(vdev, n);
191     len = address_space_cache_init(&new->avail, vdev->dma_as,
192                                    vq->vring.avail, size, false);
193     if (len < size) {
194         virtio_error(vdev, "Cannot map avail");
195         goto err_avail;
196     }
197 
198     qatomic_rcu_set(&vq->vring.caches, new);
199     if (old) {
200         call_rcu(old, virtio_free_region_cache, rcu);
201     }
202     return;
203 
204 err_avail:
205     address_space_cache_destroy(&new->avail);
206 err_used:
207     address_space_cache_destroy(&new->used);
208 err_desc:
209     address_space_cache_destroy(&new->desc);
210 out_no_cache:
211     g_free(new);
212     virtio_virtqueue_reset_region_cache(vq);
213 }
214 
215 /* virt queue functions */
216 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
217 {
218     VRing *vring = &vdev->vq[n].vring;
219 
220     if (!vring->num || !vring->desc || !vring->align) {
221         /* not yet setup -> nothing to do */
222         return;
223     }
224     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
225     vring->used = vring_align(vring->avail +
226                               offsetof(VRingAvail, ring[vring->num]),
227                               vring->align);
228     virtio_init_region_cache(vdev, n);
229 }
230 
231 /* Called within rcu_read_lock().  */
232 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
233                                   MemoryRegionCache *cache, int i)
234 {
235     address_space_read_cached(cache, i * sizeof(VRingDesc),
236                               desc, sizeof(VRingDesc));
237     virtio_tswap64s(vdev, &desc->addr);
238     virtio_tswap32s(vdev, &desc->len);
239     virtio_tswap16s(vdev, &desc->flags);
240     virtio_tswap16s(vdev, &desc->next);
241 }
242 
243 static void vring_packed_event_read(VirtIODevice *vdev,
244                                     MemoryRegionCache *cache,
245                                     VRingPackedDescEvent *e)
246 {
247     hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
248     hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
249 
250     e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
251     /* Make sure flags is seen before off_wrap */
252     smp_rmb();
253     e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
254     virtio_tswap16s(vdev, &e->flags);
255 }
256 
257 static void vring_packed_off_wrap_write(VirtIODevice *vdev,
258                                         MemoryRegionCache *cache,
259                                         uint16_t off_wrap)
260 {
261     hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
262 
263     virtio_stw_phys_cached(vdev, cache, off, off_wrap);
264     address_space_cache_invalidate(cache, off, sizeof(off_wrap));
265 }
266 
267 static void vring_packed_flags_write(VirtIODevice *vdev,
268                                      MemoryRegionCache *cache, uint16_t flags)
269 {
270     hwaddr off = offsetof(VRingPackedDescEvent, flags);
271 
272     virtio_stw_phys_cached(vdev, cache, off, flags);
273     address_space_cache_invalidate(cache, off, sizeof(flags));
274 }
275 
276 /* Called within rcu_read_lock().  */
277 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
278 {
279     return qatomic_rcu_read(&vq->vring.caches);
280 }
281 
282 /* Called within rcu_read_lock().  */
283 static inline uint16_t vring_avail_flags(VirtQueue *vq)
284 {
285     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
286     hwaddr pa = offsetof(VRingAvail, flags);
287 
288     if (!caches) {
289         return 0;
290     }
291 
292     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
293 }
294 
295 /* Called within rcu_read_lock().  */
296 static inline uint16_t vring_avail_idx(VirtQueue *vq)
297 {
298     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
299     hwaddr pa = offsetof(VRingAvail, idx);
300 
301     if (!caches) {
302         return 0;
303     }
304 
305     vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
306     return vq->shadow_avail_idx;
307 }
308 
309 /* Called within rcu_read_lock().  */
310 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
311 {
312     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
313     hwaddr pa = offsetof(VRingAvail, ring[i]);
314 
315     if (!caches) {
316         return 0;
317     }
318 
319     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
320 }
321 
322 /* Called within rcu_read_lock().  */
323 static inline uint16_t vring_get_used_event(VirtQueue *vq)
324 {
325     return vring_avail_ring(vq, vq->vring.num);
326 }
327 
328 /* Called within rcu_read_lock().  */
329 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
330                                     int i)
331 {
332     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
333     hwaddr pa = offsetof(VRingUsed, ring[i]);
334 
335     if (!caches) {
336         return;
337     }
338 
339     virtio_tswap32s(vq->vdev, &uelem->id);
340     virtio_tswap32s(vq->vdev, &uelem->len);
341     address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
342     address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
343 }
344 
345 /* Called within rcu_read_lock().  */
346 static uint16_t vring_used_idx(VirtQueue *vq)
347 {
348     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
349     hwaddr pa = offsetof(VRingUsed, idx);
350 
351     if (!caches) {
352         return 0;
353     }
354 
355     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
356 }
357 
358 /* Called within rcu_read_lock().  */
359 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
360 {
361     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
362     hwaddr pa = offsetof(VRingUsed, idx);
363 
364     if (caches) {
365         virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
366         address_space_cache_invalidate(&caches->used, pa, sizeof(val));
367     }
368 
369     vq->used_idx = val;
370 }
371 
372 /* Called within rcu_read_lock().  */
373 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
374 {
375     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
376     VirtIODevice *vdev = vq->vdev;
377     hwaddr pa = offsetof(VRingUsed, flags);
378     uint16_t flags;
379 
380     if (!caches) {
381         return;
382     }
383 
384     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
385     virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
386     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
387 }
388 
389 /* Called within rcu_read_lock().  */
390 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
391 {
392     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
393     VirtIODevice *vdev = vq->vdev;
394     hwaddr pa = offsetof(VRingUsed, flags);
395     uint16_t flags;
396 
397     if (!caches) {
398         return;
399     }
400 
401     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
402     virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
403     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
404 }
405 
406 /* Called within rcu_read_lock().  */
407 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
408 {
409     VRingMemoryRegionCaches *caches;
410     hwaddr pa;
411     if (!vq->notification) {
412         return;
413     }
414 
415     caches = vring_get_region_caches(vq);
416     if (!caches) {
417         return;
418     }
419 
420     pa = offsetof(VRingUsed, ring[vq->vring.num]);
421     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
422     address_space_cache_invalidate(&caches->used, pa, sizeof(val));
423 }
424 
425 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
426 {
427     RCU_READ_LOCK_GUARD();
428 
429     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
430         vring_set_avail_event(vq, vring_avail_idx(vq));
431     } else if (enable) {
432         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
433     } else {
434         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
435     }
436     if (enable) {
437         /* Expose avail event/used flags before caller checks the avail idx. */
438         smp_mb();
439     }
440 }
441 
442 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
443 {
444     uint16_t off_wrap;
445     VRingPackedDescEvent e;
446     VRingMemoryRegionCaches *caches;
447 
448     RCU_READ_LOCK_GUARD();
449     caches = vring_get_region_caches(vq);
450     if (!caches) {
451         return;
452     }
453 
454     vring_packed_event_read(vq->vdev, &caches->used, &e);
455 
456     if (!enable) {
457         e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
458     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
459         off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
460         vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
461         /* Make sure off_wrap is wrote before flags */
462         smp_wmb();
463         e.flags = VRING_PACKED_EVENT_FLAG_DESC;
464     } else {
465         e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
466     }
467 
468     vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
469     if (enable) {
470         /* Expose avail event/used flags before caller checks the avail idx. */
471         smp_mb();
472     }
473 }
474 
475 bool virtio_queue_get_notification(VirtQueue *vq)
476 {
477     return vq->notification;
478 }
479 
480 void virtio_queue_set_notification(VirtQueue *vq, int enable)
481 {
482     vq->notification = enable;
483 
484     if (!vq->vring.desc) {
485         return;
486     }
487 
488     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
489         virtio_queue_packed_set_notification(vq, enable);
490     } else {
491         virtio_queue_split_set_notification(vq, enable);
492     }
493 }
494 
495 int virtio_queue_ready(VirtQueue *vq)
496 {
497     return vq->vring.avail != 0;
498 }
499 
500 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
501                                          uint16_t *flags,
502                                          MemoryRegionCache *cache,
503                                          int i)
504 {
505     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
506 
507     *flags = virtio_lduw_phys_cached(vdev, cache, off);
508 }
509 
510 static void vring_packed_desc_read(VirtIODevice *vdev,
511                                    VRingPackedDesc *desc,
512                                    MemoryRegionCache *cache,
513                                    int i, bool strict_order)
514 {
515     hwaddr off = i * sizeof(VRingPackedDesc);
516 
517     vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
518 
519     if (strict_order) {
520         /* Make sure flags is read before the rest fields. */
521         smp_rmb();
522     }
523 
524     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
525                               &desc->addr, sizeof(desc->addr));
526     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
527                               &desc->id, sizeof(desc->id));
528     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
529                               &desc->len, sizeof(desc->len));
530     virtio_tswap64s(vdev, &desc->addr);
531     virtio_tswap16s(vdev, &desc->id);
532     virtio_tswap32s(vdev, &desc->len);
533 }
534 
535 static void vring_packed_desc_write_data(VirtIODevice *vdev,
536                                          VRingPackedDesc *desc,
537                                          MemoryRegionCache *cache,
538                                          int i)
539 {
540     hwaddr off_id = i * sizeof(VRingPackedDesc) +
541                     offsetof(VRingPackedDesc, id);
542     hwaddr off_len = i * sizeof(VRingPackedDesc) +
543                     offsetof(VRingPackedDesc, len);
544 
545     virtio_tswap32s(vdev, &desc->len);
546     virtio_tswap16s(vdev, &desc->id);
547     address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
548     address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
549     address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
550     address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
551 }
552 
553 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
554                                           VRingPackedDesc *desc,
555                                           MemoryRegionCache *cache,
556                                           int i)
557 {
558     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
559 
560     virtio_stw_phys_cached(vdev, cache, off, desc->flags);
561     address_space_cache_invalidate(cache, off, sizeof(desc->flags));
562 }
563 
564 static void vring_packed_desc_write(VirtIODevice *vdev,
565                                     VRingPackedDesc *desc,
566                                     MemoryRegionCache *cache,
567                                     int i, bool strict_order)
568 {
569     vring_packed_desc_write_data(vdev, desc, cache, i);
570     if (strict_order) {
571         /* Make sure data is wrote before flags. */
572         smp_wmb();
573     }
574     vring_packed_desc_write_flags(vdev, desc, cache, i);
575 }
576 
577 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
578 {
579     bool avail, used;
580 
581     avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
582     used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
583     return (avail != used) && (avail == wrap_counter);
584 }
585 
586 /* Fetch avail_idx from VQ memory only when we really need to know if
587  * guest has added some buffers.
588  * Called within rcu_read_lock().  */
589 static int virtio_queue_empty_rcu(VirtQueue *vq)
590 {
591     if (virtio_device_disabled(vq->vdev)) {
592         return 1;
593     }
594 
595     if (unlikely(!vq->vring.avail)) {
596         return 1;
597     }
598 
599     if (vq->shadow_avail_idx != vq->last_avail_idx) {
600         return 0;
601     }
602 
603     return vring_avail_idx(vq) == vq->last_avail_idx;
604 }
605 
606 static int virtio_queue_split_empty(VirtQueue *vq)
607 {
608     bool empty;
609 
610     if (virtio_device_disabled(vq->vdev)) {
611         return 1;
612     }
613 
614     if (unlikely(!vq->vring.avail)) {
615         return 1;
616     }
617 
618     if (vq->shadow_avail_idx != vq->last_avail_idx) {
619         return 0;
620     }
621 
622     RCU_READ_LOCK_GUARD();
623     empty = vring_avail_idx(vq) == vq->last_avail_idx;
624     return empty;
625 }
626 
627 /* Called within rcu_read_lock().  */
628 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
629 {
630     struct VRingPackedDesc desc;
631     VRingMemoryRegionCaches *cache;
632 
633     if (unlikely(!vq->vring.desc)) {
634         return 1;
635     }
636 
637     cache = vring_get_region_caches(vq);
638     if (!cache) {
639         return 1;
640     }
641 
642     vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
643                                  vq->last_avail_idx);
644 
645     return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
646 }
647 
648 static int virtio_queue_packed_empty(VirtQueue *vq)
649 {
650     RCU_READ_LOCK_GUARD();
651     return virtio_queue_packed_empty_rcu(vq);
652 }
653 
654 int virtio_queue_empty(VirtQueue *vq)
655 {
656     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
657         return virtio_queue_packed_empty(vq);
658     } else {
659         return virtio_queue_split_empty(vq);
660     }
661 }
662 
663 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
664                                unsigned int len)
665 {
666     AddressSpace *dma_as = vq->vdev->dma_as;
667     unsigned int offset;
668     int i;
669 
670     offset = 0;
671     for (i = 0; i < elem->in_num; i++) {
672         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
673 
674         dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
675                          elem->in_sg[i].iov_len,
676                          DMA_DIRECTION_FROM_DEVICE, size);
677 
678         offset += size;
679     }
680 
681     for (i = 0; i < elem->out_num; i++)
682         dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
683                          elem->out_sg[i].iov_len,
684                          DMA_DIRECTION_TO_DEVICE,
685                          elem->out_sg[i].iov_len);
686 }
687 
688 /* virtqueue_detach_element:
689  * @vq: The #VirtQueue
690  * @elem: The #VirtQueueElement
691  * @len: number of bytes written
692  *
693  * Detach the element from the virtqueue.  This function is suitable for device
694  * reset or other situations where a #VirtQueueElement is simply freed and will
695  * not be pushed or discarded.
696  */
697 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
698                               unsigned int len)
699 {
700     vq->inuse -= elem->ndescs;
701     virtqueue_unmap_sg(vq, elem, len);
702 }
703 
704 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
705 {
706     vq->last_avail_idx -= num;
707 }
708 
709 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
710 {
711     if (vq->last_avail_idx < num) {
712         vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
713         vq->last_avail_wrap_counter ^= 1;
714     } else {
715         vq->last_avail_idx -= num;
716     }
717 }
718 
719 /* virtqueue_unpop:
720  * @vq: The #VirtQueue
721  * @elem: The #VirtQueueElement
722  * @len: number of bytes written
723  *
724  * Pretend the most recent element wasn't popped from the virtqueue.  The next
725  * call to virtqueue_pop() will refetch the element.
726  */
727 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
728                      unsigned int len)
729 {
730 
731     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
732         virtqueue_packed_rewind(vq, 1);
733     } else {
734         virtqueue_split_rewind(vq, 1);
735     }
736 
737     virtqueue_detach_element(vq, elem, len);
738 }
739 
740 /* virtqueue_rewind:
741  * @vq: The #VirtQueue
742  * @num: Number of elements to push back
743  *
744  * Pretend that elements weren't popped from the virtqueue.  The next
745  * virtqueue_pop() will refetch the oldest element.
746  *
747  * Use virtqueue_unpop() instead if you have a VirtQueueElement.
748  *
749  * Returns: true on success, false if @num is greater than the number of in use
750  * elements.
751  */
752 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
753 {
754     if (num > vq->inuse) {
755         return false;
756     }
757 
758     vq->inuse -= num;
759     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
760         virtqueue_packed_rewind(vq, num);
761     } else {
762         virtqueue_split_rewind(vq, num);
763     }
764     return true;
765 }
766 
767 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
768                     unsigned int len, unsigned int idx)
769 {
770     VRingUsedElem uelem;
771 
772     if (unlikely(!vq->vring.used)) {
773         return;
774     }
775 
776     idx = (idx + vq->used_idx) % vq->vring.num;
777 
778     uelem.id = elem->index;
779     uelem.len = len;
780     vring_used_write(vq, &uelem, idx);
781 }
782 
783 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
784                                   unsigned int len, unsigned int idx)
785 {
786     vq->used_elems[idx].index = elem->index;
787     vq->used_elems[idx].len = len;
788     vq->used_elems[idx].ndescs = elem->ndescs;
789 }
790 
791 static void virtqueue_packed_fill_desc(VirtQueue *vq,
792                                        const VirtQueueElement *elem,
793                                        unsigned int idx,
794                                        bool strict_order)
795 {
796     uint16_t head;
797     VRingMemoryRegionCaches *caches;
798     VRingPackedDesc desc = {
799         .id = elem->index,
800         .len = elem->len,
801     };
802     bool wrap_counter = vq->used_wrap_counter;
803 
804     if (unlikely(!vq->vring.desc)) {
805         return;
806     }
807 
808     head = vq->used_idx + idx;
809     if (head >= vq->vring.num) {
810         head -= vq->vring.num;
811         wrap_counter ^= 1;
812     }
813     if (wrap_counter) {
814         desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
815         desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
816     } else {
817         desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
818         desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
819     }
820 
821     caches = vring_get_region_caches(vq);
822     if (!caches) {
823         return;
824     }
825 
826     vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
827 }
828 
829 /* Called within rcu_read_lock().  */
830 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
831                     unsigned int len, unsigned int idx)
832 {
833     trace_virtqueue_fill(vq, elem, len, idx);
834 
835     virtqueue_unmap_sg(vq, elem, len);
836 
837     if (virtio_device_disabled(vq->vdev)) {
838         return;
839     }
840 
841     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
842         virtqueue_packed_fill(vq, elem, len, idx);
843     } else {
844         virtqueue_split_fill(vq, elem, len, idx);
845     }
846 }
847 
848 /* Called within rcu_read_lock().  */
849 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
850 {
851     uint16_t old, new;
852 
853     if (unlikely(!vq->vring.used)) {
854         return;
855     }
856 
857     /* Make sure buffer is written before we update index. */
858     smp_wmb();
859     trace_virtqueue_flush(vq, count);
860     old = vq->used_idx;
861     new = old + count;
862     vring_used_idx_set(vq, new);
863     vq->inuse -= count;
864     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
865         vq->signalled_used_valid = false;
866 }
867 
868 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
869 {
870     unsigned int i, ndescs = 0;
871 
872     if (unlikely(!vq->vring.desc)) {
873         return;
874     }
875 
876     for (i = 1; i < count; i++) {
877         virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
878         ndescs += vq->used_elems[i].ndescs;
879     }
880     virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
881     ndescs += vq->used_elems[0].ndescs;
882 
883     vq->inuse -= ndescs;
884     vq->used_idx += ndescs;
885     if (vq->used_idx >= vq->vring.num) {
886         vq->used_idx -= vq->vring.num;
887         vq->used_wrap_counter ^= 1;
888         vq->signalled_used_valid = false;
889     }
890 }
891 
892 void virtqueue_flush(VirtQueue *vq, unsigned int count)
893 {
894     if (virtio_device_disabled(vq->vdev)) {
895         vq->inuse -= count;
896         return;
897     }
898 
899     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
900         virtqueue_packed_flush(vq, count);
901     } else {
902         virtqueue_split_flush(vq, count);
903     }
904 }
905 
906 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
907                     unsigned int len)
908 {
909     RCU_READ_LOCK_GUARD();
910     virtqueue_fill(vq, elem, len, 0);
911     virtqueue_flush(vq, 1);
912 }
913 
914 /* Called within rcu_read_lock().  */
915 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
916 {
917     uint16_t num_heads = vring_avail_idx(vq) - idx;
918 
919     /* Check it isn't doing very strange things with descriptor numbers. */
920     if (num_heads > vq->vring.num) {
921         virtio_error(vq->vdev, "Guest moved used index from %u to %u",
922                      idx, vq->shadow_avail_idx);
923         return -EINVAL;
924     }
925     /* On success, callers read a descriptor at vq->last_avail_idx.
926      * Make sure descriptor read does not bypass avail index read. */
927     if (num_heads) {
928         smp_rmb();
929     }
930 
931     return num_heads;
932 }
933 
934 /* Called within rcu_read_lock().  */
935 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
936                                unsigned int *head)
937 {
938     /* Grab the next descriptor number they're advertising, and increment
939      * the index we've seen. */
940     *head = vring_avail_ring(vq, idx % vq->vring.num);
941 
942     /* If their number is silly, that's a fatal mistake. */
943     if (*head >= vq->vring.num) {
944         virtio_error(vq->vdev, "Guest says index %u is available", *head);
945         return false;
946     }
947 
948     return true;
949 }
950 
951 enum {
952     VIRTQUEUE_READ_DESC_ERROR = -1,
953     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
954     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
955 };
956 
957 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
958                                           MemoryRegionCache *desc_cache,
959                                           unsigned int max, unsigned int *next)
960 {
961     /* If this descriptor says it doesn't chain, we're done. */
962     if (!(desc->flags & VRING_DESC_F_NEXT)) {
963         return VIRTQUEUE_READ_DESC_DONE;
964     }
965 
966     /* Check they're not leading us off end of descriptors. */
967     *next = desc->next;
968     /* Make sure compiler knows to grab that: we don't want it changing! */
969     smp_wmb();
970 
971     if (*next >= max) {
972         virtio_error(vdev, "Desc next is %u", *next);
973         return VIRTQUEUE_READ_DESC_ERROR;
974     }
975 
976     vring_split_desc_read(vdev, desc, desc_cache, *next);
977     return VIRTQUEUE_READ_DESC_MORE;
978 }
979 
980 /* Called within rcu_read_lock().  */
981 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
982                             unsigned int *in_bytes, unsigned int *out_bytes,
983                             unsigned max_in_bytes, unsigned max_out_bytes,
984                             VRingMemoryRegionCaches *caches)
985 {
986     VirtIODevice *vdev = vq->vdev;
987     unsigned int max, idx;
988     unsigned int total_bufs, in_total, out_total;
989     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
990     int64_t len = 0;
991     int rc;
992 
993     idx = vq->last_avail_idx;
994     total_bufs = in_total = out_total = 0;
995 
996     max = vq->vring.num;
997 
998     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
999         MemoryRegionCache *desc_cache = &caches->desc;
1000         unsigned int num_bufs;
1001         VRingDesc desc;
1002         unsigned int i;
1003 
1004         num_bufs = total_bufs;
1005 
1006         if (!virtqueue_get_head(vq, idx++, &i)) {
1007             goto err;
1008         }
1009 
1010         vring_split_desc_read(vdev, &desc, desc_cache, i);
1011 
1012         if (desc.flags & VRING_DESC_F_INDIRECT) {
1013             if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1014                 virtio_error(vdev, "Invalid size for indirect buffer table");
1015                 goto err;
1016             }
1017 
1018             /* If we've got too many, that implies a descriptor loop. */
1019             if (num_bufs >= max) {
1020                 virtio_error(vdev, "Looped descriptor");
1021                 goto err;
1022             }
1023 
1024             /* loop over the indirect descriptor table */
1025             len = address_space_cache_init(&indirect_desc_cache,
1026                                            vdev->dma_as,
1027                                            desc.addr, desc.len, false);
1028             desc_cache = &indirect_desc_cache;
1029             if (len < desc.len) {
1030                 virtio_error(vdev, "Cannot map indirect buffer");
1031                 goto err;
1032             }
1033 
1034             max = desc.len / sizeof(VRingDesc);
1035             num_bufs = i = 0;
1036             vring_split_desc_read(vdev, &desc, desc_cache, i);
1037         }
1038 
1039         do {
1040             /* If we've got too many, that implies a descriptor loop. */
1041             if (++num_bufs > max) {
1042                 virtio_error(vdev, "Looped descriptor");
1043                 goto err;
1044             }
1045 
1046             if (desc.flags & VRING_DESC_F_WRITE) {
1047                 in_total += desc.len;
1048             } else {
1049                 out_total += desc.len;
1050             }
1051             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1052                 goto done;
1053             }
1054 
1055             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1056         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1057 
1058         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1059             goto err;
1060         }
1061 
1062         if (desc_cache == &indirect_desc_cache) {
1063             address_space_cache_destroy(&indirect_desc_cache);
1064             total_bufs++;
1065         } else {
1066             total_bufs = num_bufs;
1067         }
1068     }
1069 
1070     if (rc < 0) {
1071         goto err;
1072     }
1073 
1074 done:
1075     address_space_cache_destroy(&indirect_desc_cache);
1076     if (in_bytes) {
1077         *in_bytes = in_total;
1078     }
1079     if (out_bytes) {
1080         *out_bytes = out_total;
1081     }
1082     return;
1083 
1084 err:
1085     in_total = out_total = 0;
1086     goto done;
1087 }
1088 
1089 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1090                                            VRingPackedDesc *desc,
1091                                            MemoryRegionCache
1092                                            *desc_cache,
1093                                            unsigned int max,
1094                                            unsigned int *next,
1095                                            bool indirect)
1096 {
1097     /* If this descriptor says it doesn't chain, we're done. */
1098     if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1099         return VIRTQUEUE_READ_DESC_DONE;
1100     }
1101 
1102     ++*next;
1103     if (*next == max) {
1104         if (indirect) {
1105             return VIRTQUEUE_READ_DESC_DONE;
1106         } else {
1107             (*next) -= vq->vring.num;
1108         }
1109     }
1110 
1111     vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1112     return VIRTQUEUE_READ_DESC_MORE;
1113 }
1114 
1115 /* Called within rcu_read_lock().  */
1116 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1117                                              unsigned int *in_bytes,
1118                                              unsigned int *out_bytes,
1119                                              unsigned max_in_bytes,
1120                                              unsigned max_out_bytes,
1121                                              VRingMemoryRegionCaches *caches)
1122 {
1123     VirtIODevice *vdev = vq->vdev;
1124     unsigned int max, idx;
1125     unsigned int total_bufs, in_total, out_total;
1126     MemoryRegionCache *desc_cache;
1127     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1128     int64_t len = 0;
1129     VRingPackedDesc desc;
1130     bool wrap_counter;
1131 
1132     idx = vq->last_avail_idx;
1133     wrap_counter = vq->last_avail_wrap_counter;
1134     total_bufs = in_total = out_total = 0;
1135 
1136     max = vq->vring.num;
1137 
1138     for (;;) {
1139         unsigned int num_bufs = total_bufs;
1140         unsigned int i = idx;
1141         int rc;
1142 
1143         desc_cache = &caches->desc;
1144         vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1145         if (!is_desc_avail(desc.flags, wrap_counter)) {
1146             break;
1147         }
1148 
1149         if (desc.flags & VRING_DESC_F_INDIRECT) {
1150             if (desc.len % sizeof(VRingPackedDesc)) {
1151                 virtio_error(vdev, "Invalid size for indirect buffer table");
1152                 goto err;
1153             }
1154 
1155             /* If we've got too many, that implies a descriptor loop. */
1156             if (num_bufs >= max) {
1157                 virtio_error(vdev, "Looped descriptor");
1158                 goto err;
1159             }
1160 
1161             /* loop over the indirect descriptor table */
1162             len = address_space_cache_init(&indirect_desc_cache,
1163                                            vdev->dma_as,
1164                                            desc.addr, desc.len, false);
1165             desc_cache = &indirect_desc_cache;
1166             if (len < desc.len) {
1167                 virtio_error(vdev, "Cannot map indirect buffer");
1168                 goto err;
1169             }
1170 
1171             max = desc.len / sizeof(VRingPackedDesc);
1172             num_bufs = i = 0;
1173             vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1174         }
1175 
1176         do {
1177             /* If we've got too many, that implies a descriptor loop. */
1178             if (++num_bufs > max) {
1179                 virtio_error(vdev, "Looped descriptor");
1180                 goto err;
1181             }
1182 
1183             if (desc.flags & VRING_DESC_F_WRITE) {
1184                 in_total += desc.len;
1185             } else {
1186                 out_total += desc.len;
1187             }
1188             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1189                 goto done;
1190             }
1191 
1192             rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1193                                                  &i, desc_cache ==
1194                                                  &indirect_desc_cache);
1195         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1196 
1197         if (desc_cache == &indirect_desc_cache) {
1198             address_space_cache_destroy(&indirect_desc_cache);
1199             total_bufs++;
1200             idx++;
1201         } else {
1202             idx += num_bufs - total_bufs;
1203             total_bufs = num_bufs;
1204         }
1205 
1206         if (idx >= vq->vring.num) {
1207             idx -= vq->vring.num;
1208             wrap_counter ^= 1;
1209         }
1210     }
1211 
1212     /* Record the index and wrap counter for a kick we want */
1213     vq->shadow_avail_idx = idx;
1214     vq->shadow_avail_wrap_counter = wrap_counter;
1215 done:
1216     address_space_cache_destroy(&indirect_desc_cache);
1217     if (in_bytes) {
1218         *in_bytes = in_total;
1219     }
1220     if (out_bytes) {
1221         *out_bytes = out_total;
1222     }
1223     return;
1224 
1225 err:
1226     in_total = out_total = 0;
1227     goto done;
1228 }
1229 
1230 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1231                                unsigned int *out_bytes,
1232                                unsigned max_in_bytes, unsigned max_out_bytes)
1233 {
1234     uint16_t desc_size;
1235     VRingMemoryRegionCaches *caches;
1236 
1237     RCU_READ_LOCK_GUARD();
1238 
1239     if (unlikely(!vq->vring.desc)) {
1240         goto err;
1241     }
1242 
1243     caches = vring_get_region_caches(vq);
1244     if (!caches) {
1245         goto err;
1246     }
1247 
1248     desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1249                                 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1250     if (caches->desc.len < vq->vring.num * desc_size) {
1251         virtio_error(vq->vdev, "Cannot map descriptor ring");
1252         goto err;
1253     }
1254 
1255     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1256         virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1257                                          max_in_bytes, max_out_bytes,
1258                                          caches);
1259     } else {
1260         virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1261                                         max_in_bytes, max_out_bytes,
1262                                         caches);
1263     }
1264 
1265     return;
1266 err:
1267     if (in_bytes) {
1268         *in_bytes = 0;
1269     }
1270     if (out_bytes) {
1271         *out_bytes = 0;
1272     }
1273 }
1274 
1275 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1276                           unsigned int out_bytes)
1277 {
1278     unsigned int in_total, out_total;
1279 
1280     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1281     return in_bytes <= in_total && out_bytes <= out_total;
1282 }
1283 
1284 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1285                                hwaddr *addr, struct iovec *iov,
1286                                unsigned int max_num_sg, bool is_write,
1287                                hwaddr pa, size_t sz)
1288 {
1289     bool ok = false;
1290     unsigned num_sg = *p_num_sg;
1291     assert(num_sg <= max_num_sg);
1292 
1293     if (!sz) {
1294         virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1295         goto out;
1296     }
1297 
1298     while (sz) {
1299         hwaddr len = sz;
1300 
1301         if (num_sg == max_num_sg) {
1302             virtio_error(vdev, "virtio: too many write descriptors in "
1303                                "indirect table");
1304             goto out;
1305         }
1306 
1307         iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1308                                               is_write ?
1309                                               DMA_DIRECTION_FROM_DEVICE :
1310                                               DMA_DIRECTION_TO_DEVICE,
1311                                               MEMTXATTRS_UNSPECIFIED);
1312         if (!iov[num_sg].iov_base) {
1313             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1314             goto out;
1315         }
1316 
1317         iov[num_sg].iov_len = len;
1318         addr[num_sg] = pa;
1319 
1320         sz -= len;
1321         pa += len;
1322         num_sg++;
1323     }
1324     ok = true;
1325 
1326 out:
1327     *p_num_sg = num_sg;
1328     return ok;
1329 }
1330 
1331 /* Only used by error code paths before we have a VirtQueueElement (therefore
1332  * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1333  * yet.
1334  */
1335 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1336                                     struct iovec *iov)
1337 {
1338     unsigned int i;
1339 
1340     for (i = 0; i < out_num + in_num; i++) {
1341         int is_write = i >= out_num;
1342 
1343         cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1344         iov++;
1345     }
1346 }
1347 
1348 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1349                                 hwaddr *addr, unsigned int num_sg,
1350                                 bool is_write)
1351 {
1352     unsigned int i;
1353     hwaddr len;
1354 
1355     for (i = 0; i < num_sg; i++) {
1356         len = sg[i].iov_len;
1357         sg[i].iov_base = dma_memory_map(vdev->dma_as,
1358                                         addr[i], &len, is_write ?
1359                                         DMA_DIRECTION_FROM_DEVICE :
1360                                         DMA_DIRECTION_TO_DEVICE,
1361                                         MEMTXATTRS_UNSPECIFIED);
1362         if (!sg[i].iov_base) {
1363             error_report("virtio: error trying to map MMIO memory");
1364             exit(1);
1365         }
1366         if (len != sg[i].iov_len) {
1367             error_report("virtio: unexpected memory split");
1368             exit(1);
1369         }
1370     }
1371 }
1372 
1373 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1374 {
1375     virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1376     virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1377                                                                         false);
1378 }
1379 
1380 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1381 {
1382     VirtQueueElement *elem;
1383     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1384     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1385     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1386     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1387     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1388     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1389 
1390     assert(sz >= sizeof(VirtQueueElement));
1391     elem = g_malloc(out_sg_end);
1392     trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1393     elem->out_num = out_num;
1394     elem->in_num = in_num;
1395     elem->in_addr = (void *)elem + in_addr_ofs;
1396     elem->out_addr = (void *)elem + out_addr_ofs;
1397     elem->in_sg = (void *)elem + in_sg_ofs;
1398     elem->out_sg = (void *)elem + out_sg_ofs;
1399     return elem;
1400 }
1401 
1402 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1403 {
1404     unsigned int i, head, max;
1405     VRingMemoryRegionCaches *caches;
1406     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1407     MemoryRegionCache *desc_cache;
1408     int64_t len;
1409     VirtIODevice *vdev = vq->vdev;
1410     VirtQueueElement *elem = NULL;
1411     unsigned out_num, in_num, elem_entries;
1412     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1413     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1414     VRingDesc desc;
1415     int rc;
1416 
1417     RCU_READ_LOCK_GUARD();
1418     if (virtio_queue_empty_rcu(vq)) {
1419         goto done;
1420     }
1421     /* Needed after virtio_queue_empty(), see comment in
1422      * virtqueue_num_heads(). */
1423     smp_rmb();
1424 
1425     /* When we start there are none of either input nor output. */
1426     out_num = in_num = elem_entries = 0;
1427 
1428     max = vq->vring.num;
1429 
1430     if (vq->inuse >= vq->vring.num) {
1431         virtio_error(vdev, "Virtqueue size exceeded");
1432         goto done;
1433     }
1434 
1435     if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1436         goto done;
1437     }
1438 
1439     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1440         vring_set_avail_event(vq, vq->last_avail_idx);
1441     }
1442 
1443     i = head;
1444 
1445     caches = vring_get_region_caches(vq);
1446     if (!caches) {
1447         virtio_error(vdev, "Region caches not initialized");
1448         goto done;
1449     }
1450 
1451     if (caches->desc.len < max * sizeof(VRingDesc)) {
1452         virtio_error(vdev, "Cannot map descriptor ring");
1453         goto done;
1454     }
1455 
1456     desc_cache = &caches->desc;
1457     vring_split_desc_read(vdev, &desc, desc_cache, i);
1458     if (desc.flags & VRING_DESC_F_INDIRECT) {
1459         if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1460             virtio_error(vdev, "Invalid size for indirect buffer table");
1461             goto done;
1462         }
1463 
1464         /* loop over the indirect descriptor table */
1465         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1466                                        desc.addr, desc.len, false);
1467         desc_cache = &indirect_desc_cache;
1468         if (len < desc.len) {
1469             virtio_error(vdev, "Cannot map indirect buffer");
1470             goto done;
1471         }
1472 
1473         max = desc.len / sizeof(VRingDesc);
1474         i = 0;
1475         vring_split_desc_read(vdev, &desc, desc_cache, i);
1476     }
1477 
1478     /* Collect all the descriptors */
1479     do {
1480         bool map_ok;
1481 
1482         if (desc.flags & VRING_DESC_F_WRITE) {
1483             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1484                                         iov + out_num,
1485                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1486                                         desc.addr, desc.len);
1487         } else {
1488             if (in_num) {
1489                 virtio_error(vdev, "Incorrect order for descriptors");
1490                 goto err_undo_map;
1491             }
1492             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1493                                         VIRTQUEUE_MAX_SIZE, false,
1494                                         desc.addr, desc.len);
1495         }
1496         if (!map_ok) {
1497             goto err_undo_map;
1498         }
1499 
1500         /* If we've got too many, that implies a descriptor loop. */
1501         if (++elem_entries > max) {
1502             virtio_error(vdev, "Looped descriptor");
1503             goto err_undo_map;
1504         }
1505 
1506         rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1507     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1508 
1509     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1510         goto err_undo_map;
1511     }
1512 
1513     /* Now copy what we have collected and mapped */
1514     elem = virtqueue_alloc_element(sz, out_num, in_num);
1515     elem->index = head;
1516     elem->ndescs = 1;
1517     for (i = 0; i < out_num; i++) {
1518         elem->out_addr[i] = addr[i];
1519         elem->out_sg[i] = iov[i];
1520     }
1521     for (i = 0; i < in_num; i++) {
1522         elem->in_addr[i] = addr[out_num + i];
1523         elem->in_sg[i] = iov[out_num + i];
1524     }
1525 
1526     vq->inuse++;
1527 
1528     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1529 done:
1530     address_space_cache_destroy(&indirect_desc_cache);
1531 
1532     return elem;
1533 
1534 err_undo_map:
1535     virtqueue_undo_map_desc(out_num, in_num, iov);
1536     goto done;
1537 }
1538 
1539 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1540 {
1541     unsigned int i, max;
1542     VRingMemoryRegionCaches *caches;
1543     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1544     MemoryRegionCache *desc_cache;
1545     int64_t len;
1546     VirtIODevice *vdev = vq->vdev;
1547     VirtQueueElement *elem = NULL;
1548     unsigned out_num, in_num, elem_entries;
1549     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1550     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1551     VRingPackedDesc desc;
1552     uint16_t id;
1553     int rc;
1554 
1555     RCU_READ_LOCK_GUARD();
1556     if (virtio_queue_packed_empty_rcu(vq)) {
1557         goto done;
1558     }
1559 
1560     /* When we start there are none of either input nor output. */
1561     out_num = in_num = elem_entries = 0;
1562 
1563     max = vq->vring.num;
1564 
1565     if (vq->inuse >= vq->vring.num) {
1566         virtio_error(vdev, "Virtqueue size exceeded");
1567         goto done;
1568     }
1569 
1570     i = vq->last_avail_idx;
1571 
1572     caches = vring_get_region_caches(vq);
1573     if (!caches) {
1574         virtio_error(vdev, "Region caches not initialized");
1575         goto done;
1576     }
1577 
1578     if (caches->desc.len < max * sizeof(VRingDesc)) {
1579         virtio_error(vdev, "Cannot map descriptor ring");
1580         goto done;
1581     }
1582 
1583     desc_cache = &caches->desc;
1584     vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1585     id = desc.id;
1586     if (desc.flags & VRING_DESC_F_INDIRECT) {
1587         if (desc.len % sizeof(VRingPackedDesc)) {
1588             virtio_error(vdev, "Invalid size for indirect buffer table");
1589             goto done;
1590         }
1591 
1592         /* loop over the indirect descriptor table */
1593         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1594                                        desc.addr, desc.len, false);
1595         desc_cache = &indirect_desc_cache;
1596         if (len < desc.len) {
1597             virtio_error(vdev, "Cannot map indirect buffer");
1598             goto done;
1599         }
1600 
1601         max = desc.len / sizeof(VRingPackedDesc);
1602         i = 0;
1603         vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1604     }
1605 
1606     /* Collect all the descriptors */
1607     do {
1608         bool map_ok;
1609 
1610         if (desc.flags & VRING_DESC_F_WRITE) {
1611             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1612                                         iov + out_num,
1613                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1614                                         desc.addr, desc.len);
1615         } else {
1616             if (in_num) {
1617                 virtio_error(vdev, "Incorrect order for descriptors");
1618                 goto err_undo_map;
1619             }
1620             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1621                                         VIRTQUEUE_MAX_SIZE, false,
1622                                         desc.addr, desc.len);
1623         }
1624         if (!map_ok) {
1625             goto err_undo_map;
1626         }
1627 
1628         /* If we've got too many, that implies a descriptor loop. */
1629         if (++elem_entries > max) {
1630             virtio_error(vdev, "Looped descriptor");
1631             goto err_undo_map;
1632         }
1633 
1634         rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1635                                              desc_cache ==
1636                                              &indirect_desc_cache);
1637     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1638 
1639     /* Now copy what we have collected and mapped */
1640     elem = virtqueue_alloc_element(sz, out_num, in_num);
1641     for (i = 0; i < out_num; i++) {
1642         elem->out_addr[i] = addr[i];
1643         elem->out_sg[i] = iov[i];
1644     }
1645     for (i = 0; i < in_num; i++) {
1646         elem->in_addr[i] = addr[out_num + i];
1647         elem->in_sg[i] = iov[out_num + i];
1648     }
1649 
1650     elem->index = id;
1651     elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1652     vq->last_avail_idx += elem->ndescs;
1653     vq->inuse += elem->ndescs;
1654 
1655     if (vq->last_avail_idx >= vq->vring.num) {
1656         vq->last_avail_idx -= vq->vring.num;
1657         vq->last_avail_wrap_counter ^= 1;
1658     }
1659 
1660     vq->shadow_avail_idx = vq->last_avail_idx;
1661     vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1662 
1663     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1664 done:
1665     address_space_cache_destroy(&indirect_desc_cache);
1666 
1667     return elem;
1668 
1669 err_undo_map:
1670     virtqueue_undo_map_desc(out_num, in_num, iov);
1671     goto done;
1672 }
1673 
1674 void *virtqueue_pop(VirtQueue *vq, size_t sz)
1675 {
1676     if (virtio_device_disabled(vq->vdev)) {
1677         return NULL;
1678     }
1679 
1680     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1681         return virtqueue_packed_pop(vq, sz);
1682     } else {
1683         return virtqueue_split_pop(vq, sz);
1684     }
1685 }
1686 
1687 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1688 {
1689     VRingMemoryRegionCaches *caches;
1690     MemoryRegionCache *desc_cache;
1691     unsigned int dropped = 0;
1692     VirtQueueElement elem = {};
1693     VirtIODevice *vdev = vq->vdev;
1694     VRingPackedDesc desc;
1695 
1696     RCU_READ_LOCK_GUARD();
1697 
1698     caches = vring_get_region_caches(vq);
1699     if (!caches) {
1700         return 0;
1701     }
1702 
1703     desc_cache = &caches->desc;
1704 
1705     virtio_queue_set_notification(vq, 0);
1706 
1707     while (vq->inuse < vq->vring.num) {
1708         unsigned int idx = vq->last_avail_idx;
1709         /*
1710          * works similar to virtqueue_pop but does not map buffers
1711          * and does not allocate any memory.
1712          */
1713         vring_packed_desc_read(vdev, &desc, desc_cache,
1714                                vq->last_avail_idx , true);
1715         if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1716             break;
1717         }
1718         elem.index = desc.id;
1719         elem.ndescs = 1;
1720         while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1721                                                vq->vring.num, &idx, false)) {
1722             ++elem.ndescs;
1723         }
1724         /*
1725          * immediately push the element, nothing to unmap
1726          * as both in_num and out_num are set to 0.
1727          */
1728         virtqueue_push(vq, &elem, 0);
1729         dropped++;
1730         vq->last_avail_idx += elem.ndescs;
1731         if (vq->last_avail_idx >= vq->vring.num) {
1732             vq->last_avail_idx -= vq->vring.num;
1733             vq->last_avail_wrap_counter ^= 1;
1734         }
1735     }
1736 
1737     return dropped;
1738 }
1739 
1740 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1741 {
1742     unsigned int dropped = 0;
1743     VirtQueueElement elem = {};
1744     VirtIODevice *vdev = vq->vdev;
1745     bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1746 
1747     while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1748         /* works similar to virtqueue_pop but does not map buffers
1749         * and does not allocate any memory */
1750         smp_rmb();
1751         if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1752             break;
1753         }
1754         vq->inuse++;
1755         vq->last_avail_idx++;
1756         if (fEventIdx) {
1757             vring_set_avail_event(vq, vq->last_avail_idx);
1758         }
1759         /* immediately push the element, nothing to unmap
1760          * as both in_num and out_num are set to 0 */
1761         virtqueue_push(vq, &elem, 0);
1762         dropped++;
1763     }
1764 
1765     return dropped;
1766 }
1767 
1768 /* virtqueue_drop_all:
1769  * @vq: The #VirtQueue
1770  * Drops all queued buffers and indicates them to the guest
1771  * as if they are done. Useful when buffers can not be
1772  * processed but must be returned to the guest.
1773  */
1774 unsigned int virtqueue_drop_all(VirtQueue *vq)
1775 {
1776     struct VirtIODevice *vdev = vq->vdev;
1777 
1778     if (virtio_device_disabled(vq->vdev)) {
1779         return 0;
1780     }
1781 
1782     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1783         return virtqueue_packed_drop_all(vq);
1784     } else {
1785         return virtqueue_split_drop_all(vq);
1786     }
1787 }
1788 
1789 /* Reading and writing a structure directly to QEMUFile is *awful*, but
1790  * it is what QEMU has always done by mistake.  We can change it sooner
1791  * or later by bumping the version number of the affected vm states.
1792  * In the meanwhile, since the in-memory layout of VirtQueueElement
1793  * has changed, we need to marshal to and from the layout that was
1794  * used before the change.
1795  */
1796 typedef struct VirtQueueElementOld {
1797     unsigned int index;
1798     unsigned int out_num;
1799     unsigned int in_num;
1800     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1801     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1802     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1803     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1804 } VirtQueueElementOld;
1805 
1806 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1807 {
1808     VirtQueueElement *elem;
1809     VirtQueueElementOld data;
1810     int i;
1811 
1812     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1813 
1814     /* TODO: teach all callers that this can fail, and return failure instead
1815      * of asserting here.
1816      * This is just one thing (there are probably more) that must be
1817      * fixed before we can allow NDEBUG compilation.
1818      */
1819     assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1820     assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1821 
1822     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1823     elem->index = data.index;
1824 
1825     for (i = 0; i < elem->in_num; i++) {
1826         elem->in_addr[i] = data.in_addr[i];
1827     }
1828 
1829     for (i = 0; i < elem->out_num; i++) {
1830         elem->out_addr[i] = data.out_addr[i];
1831     }
1832 
1833     for (i = 0; i < elem->in_num; i++) {
1834         /* Base is overwritten by virtqueue_map.  */
1835         elem->in_sg[i].iov_base = 0;
1836         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1837     }
1838 
1839     for (i = 0; i < elem->out_num; i++) {
1840         /* Base is overwritten by virtqueue_map.  */
1841         elem->out_sg[i].iov_base = 0;
1842         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1843     }
1844 
1845     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1846         qemu_get_be32s(f, &elem->ndescs);
1847     }
1848 
1849     virtqueue_map(vdev, elem);
1850     return elem;
1851 }
1852 
1853 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1854                                 VirtQueueElement *elem)
1855 {
1856     VirtQueueElementOld data;
1857     int i;
1858 
1859     memset(&data, 0, sizeof(data));
1860     data.index = elem->index;
1861     data.in_num = elem->in_num;
1862     data.out_num = elem->out_num;
1863 
1864     for (i = 0; i < elem->in_num; i++) {
1865         data.in_addr[i] = elem->in_addr[i];
1866     }
1867 
1868     for (i = 0; i < elem->out_num; i++) {
1869         data.out_addr[i] = elem->out_addr[i];
1870     }
1871 
1872     for (i = 0; i < elem->in_num; i++) {
1873         /* Base is overwritten by virtqueue_map when loading.  Do not
1874          * save it, as it would leak the QEMU address space layout.  */
1875         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1876     }
1877 
1878     for (i = 0; i < elem->out_num; i++) {
1879         /* Do not save iov_base as above.  */
1880         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1881     }
1882 
1883     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1884         qemu_put_be32s(f, &elem->ndescs);
1885     }
1886 
1887     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1888 }
1889 
1890 /* virtio device */
1891 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1892 {
1893     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1894     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1895 
1896     if (virtio_device_disabled(vdev)) {
1897         return;
1898     }
1899 
1900     if (k->notify) {
1901         k->notify(qbus->parent, vector);
1902     }
1903 }
1904 
1905 void virtio_update_irq(VirtIODevice *vdev)
1906 {
1907     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1908 }
1909 
1910 static int virtio_validate_features(VirtIODevice *vdev)
1911 {
1912     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1913 
1914     if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1915         !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1916         return -EFAULT;
1917     }
1918 
1919     if (k->validate_features) {
1920         return k->validate_features(vdev);
1921     } else {
1922         return 0;
1923     }
1924 }
1925 
1926 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1927 {
1928     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1929     trace_virtio_set_status(vdev, val);
1930 
1931     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1932         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
1933             val & VIRTIO_CONFIG_S_FEATURES_OK) {
1934             int ret = virtio_validate_features(vdev);
1935 
1936             if (ret) {
1937                 return ret;
1938             }
1939         }
1940     }
1941 
1942     if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
1943         (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1944         virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
1945     }
1946 
1947     if (k->set_status) {
1948         k->set_status(vdev, val);
1949     }
1950     vdev->status = val;
1951 
1952     return 0;
1953 }
1954 
1955 static enum virtio_device_endian virtio_default_endian(void)
1956 {
1957     if (target_words_bigendian()) {
1958         return VIRTIO_DEVICE_ENDIAN_BIG;
1959     } else {
1960         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1961     }
1962 }
1963 
1964 static enum virtio_device_endian virtio_current_cpu_endian(void)
1965 {
1966     if (cpu_virtio_is_big_endian(current_cpu)) {
1967         return VIRTIO_DEVICE_ENDIAN_BIG;
1968     } else {
1969         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1970     }
1971 }
1972 
1973 void virtio_reset(void *opaque)
1974 {
1975     VirtIODevice *vdev = opaque;
1976     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1977     int i;
1978 
1979     virtio_set_status(vdev, 0);
1980     if (current_cpu) {
1981         /* Guest initiated reset */
1982         vdev->device_endian = virtio_current_cpu_endian();
1983     } else {
1984         /* System reset */
1985         vdev->device_endian = virtio_default_endian();
1986     }
1987 
1988     if (k->reset) {
1989         k->reset(vdev);
1990     }
1991 
1992     vdev->start_on_kick = false;
1993     vdev->started = false;
1994     vdev->broken = false;
1995     vdev->guest_features = 0;
1996     vdev->queue_sel = 0;
1997     vdev->status = 0;
1998     vdev->disabled = false;
1999     qatomic_set(&vdev->isr, 0);
2000     vdev->config_vector = VIRTIO_NO_VECTOR;
2001     virtio_notify_vector(vdev, vdev->config_vector);
2002 
2003     for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2004         vdev->vq[i].vring.desc = 0;
2005         vdev->vq[i].vring.avail = 0;
2006         vdev->vq[i].vring.used = 0;
2007         vdev->vq[i].last_avail_idx = 0;
2008         vdev->vq[i].shadow_avail_idx = 0;
2009         vdev->vq[i].used_idx = 0;
2010         vdev->vq[i].last_avail_wrap_counter = true;
2011         vdev->vq[i].shadow_avail_wrap_counter = true;
2012         vdev->vq[i].used_wrap_counter = true;
2013         virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2014         vdev->vq[i].signalled_used = 0;
2015         vdev->vq[i].signalled_used_valid = false;
2016         vdev->vq[i].notification = true;
2017         vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2018         vdev->vq[i].inuse = 0;
2019         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2020     }
2021 }
2022 
2023 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
2024 {
2025     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2026     uint8_t val;
2027 
2028     if (addr + sizeof(val) > vdev->config_len) {
2029         return (uint32_t)-1;
2030     }
2031 
2032     k->get_config(vdev, vdev->config);
2033 
2034     val = ldub_p(vdev->config + addr);
2035     return val;
2036 }
2037 
2038 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
2039 {
2040     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2041     uint16_t val;
2042 
2043     if (addr + sizeof(val) > vdev->config_len) {
2044         return (uint32_t)-1;
2045     }
2046 
2047     k->get_config(vdev, vdev->config);
2048 
2049     val = lduw_p(vdev->config + addr);
2050     return val;
2051 }
2052 
2053 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
2054 {
2055     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2056     uint32_t val;
2057 
2058     if (addr + sizeof(val) > vdev->config_len) {
2059         return (uint32_t)-1;
2060     }
2061 
2062     k->get_config(vdev, vdev->config);
2063 
2064     val = ldl_p(vdev->config + addr);
2065     return val;
2066 }
2067 
2068 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2069 {
2070     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2071     uint8_t val = data;
2072 
2073     if (addr + sizeof(val) > vdev->config_len) {
2074         return;
2075     }
2076 
2077     stb_p(vdev->config + addr, val);
2078 
2079     if (k->set_config) {
2080         k->set_config(vdev, vdev->config);
2081     }
2082 }
2083 
2084 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2085 {
2086     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2087     uint16_t val = data;
2088 
2089     if (addr + sizeof(val) > vdev->config_len) {
2090         return;
2091     }
2092 
2093     stw_p(vdev->config + addr, val);
2094 
2095     if (k->set_config) {
2096         k->set_config(vdev, vdev->config);
2097     }
2098 }
2099 
2100 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2101 {
2102     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2103     uint32_t val = data;
2104 
2105     if (addr + sizeof(val) > vdev->config_len) {
2106         return;
2107     }
2108 
2109     stl_p(vdev->config + addr, val);
2110 
2111     if (k->set_config) {
2112         k->set_config(vdev, vdev->config);
2113     }
2114 }
2115 
2116 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2117 {
2118     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2119     uint8_t val;
2120 
2121     if (addr + sizeof(val) > vdev->config_len) {
2122         return (uint32_t)-1;
2123     }
2124 
2125     k->get_config(vdev, vdev->config);
2126 
2127     val = ldub_p(vdev->config + addr);
2128     return val;
2129 }
2130 
2131 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2132 {
2133     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2134     uint16_t val;
2135 
2136     if (addr + sizeof(val) > vdev->config_len) {
2137         return (uint32_t)-1;
2138     }
2139 
2140     k->get_config(vdev, vdev->config);
2141 
2142     val = lduw_le_p(vdev->config + addr);
2143     return val;
2144 }
2145 
2146 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2147 {
2148     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2149     uint32_t val;
2150 
2151     if (addr + sizeof(val) > vdev->config_len) {
2152         return (uint32_t)-1;
2153     }
2154 
2155     k->get_config(vdev, vdev->config);
2156 
2157     val = ldl_le_p(vdev->config + addr);
2158     return val;
2159 }
2160 
2161 void virtio_config_modern_writeb(VirtIODevice *vdev,
2162                                  uint32_t addr, uint32_t data)
2163 {
2164     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2165     uint8_t val = data;
2166 
2167     if (addr + sizeof(val) > vdev->config_len) {
2168         return;
2169     }
2170 
2171     stb_p(vdev->config + addr, val);
2172 
2173     if (k->set_config) {
2174         k->set_config(vdev, vdev->config);
2175     }
2176 }
2177 
2178 void virtio_config_modern_writew(VirtIODevice *vdev,
2179                                  uint32_t addr, uint32_t data)
2180 {
2181     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2182     uint16_t val = data;
2183 
2184     if (addr + sizeof(val) > vdev->config_len) {
2185         return;
2186     }
2187 
2188     stw_le_p(vdev->config + addr, val);
2189 
2190     if (k->set_config) {
2191         k->set_config(vdev, vdev->config);
2192     }
2193 }
2194 
2195 void virtio_config_modern_writel(VirtIODevice *vdev,
2196                                  uint32_t addr, uint32_t data)
2197 {
2198     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2199     uint32_t val = data;
2200 
2201     if (addr + sizeof(val) > vdev->config_len) {
2202         return;
2203     }
2204 
2205     stl_le_p(vdev->config + addr, val);
2206 
2207     if (k->set_config) {
2208         k->set_config(vdev, vdev->config);
2209     }
2210 }
2211 
2212 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2213 {
2214     if (!vdev->vq[n].vring.num) {
2215         return;
2216     }
2217     vdev->vq[n].vring.desc = addr;
2218     virtio_queue_update_rings(vdev, n);
2219 }
2220 
2221 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2222 {
2223     return vdev->vq[n].vring.desc;
2224 }
2225 
2226 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2227                             hwaddr avail, hwaddr used)
2228 {
2229     if (!vdev->vq[n].vring.num) {
2230         return;
2231     }
2232     vdev->vq[n].vring.desc = desc;
2233     vdev->vq[n].vring.avail = avail;
2234     vdev->vq[n].vring.used = used;
2235     virtio_init_region_cache(vdev, n);
2236 }
2237 
2238 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2239 {
2240     /* Don't allow guest to flip queue between existent and
2241      * nonexistent states, or to set it to an invalid size.
2242      */
2243     if (!!num != !!vdev->vq[n].vring.num ||
2244         num > VIRTQUEUE_MAX_SIZE ||
2245         num < 0) {
2246         return;
2247     }
2248     vdev->vq[n].vring.num = num;
2249 }
2250 
2251 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2252 {
2253     return QLIST_FIRST(&vdev->vector_queues[vector]);
2254 }
2255 
2256 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2257 {
2258     return QLIST_NEXT(vq, node);
2259 }
2260 
2261 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2262 {
2263     return vdev->vq[n].vring.num;
2264 }
2265 
2266 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2267 {
2268     return vdev->vq[n].vring.num_default;
2269 }
2270 
2271 int virtio_get_num_queues(VirtIODevice *vdev)
2272 {
2273     int i;
2274 
2275     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2276         if (!virtio_queue_get_num(vdev, i)) {
2277             break;
2278         }
2279     }
2280 
2281     return i;
2282 }
2283 
2284 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2285 {
2286     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2287     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2288 
2289     /* virtio-1 compliant devices cannot change the alignment */
2290     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2291         error_report("tried to modify queue alignment for virtio-1 device");
2292         return;
2293     }
2294     /* Check that the transport told us it was going to do this
2295      * (so a buggy transport will immediately assert rather than
2296      * silently failing to migrate this state)
2297      */
2298     assert(k->has_variable_vring_alignment);
2299 
2300     if (align) {
2301         vdev->vq[n].vring.align = align;
2302         virtio_queue_update_rings(vdev, n);
2303     }
2304 }
2305 
2306 static void virtio_queue_notify_aio_vq(VirtQueue *vq)
2307 {
2308     if (vq->vring.desc && vq->handle_aio_output) {
2309         VirtIODevice *vdev = vq->vdev;
2310 
2311         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2312         vq->handle_aio_output(vdev, vq);
2313 
2314         if (unlikely(vdev->start_on_kick)) {
2315             virtio_set_started(vdev, true);
2316         }
2317     }
2318 }
2319 
2320 static void virtio_queue_notify_vq(VirtQueue *vq)
2321 {
2322     if (vq->vring.desc && vq->handle_output) {
2323         VirtIODevice *vdev = vq->vdev;
2324 
2325         if (unlikely(vdev->broken)) {
2326             return;
2327         }
2328 
2329         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2330         vq->handle_output(vdev, vq);
2331 
2332         if (unlikely(vdev->start_on_kick)) {
2333             virtio_set_started(vdev, true);
2334         }
2335     }
2336 }
2337 
2338 void virtio_queue_notify(VirtIODevice *vdev, int n)
2339 {
2340     VirtQueue *vq = &vdev->vq[n];
2341 
2342     if (unlikely(!vq->vring.desc || vdev->broken)) {
2343         return;
2344     }
2345 
2346     trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2347     if (vq->host_notifier_enabled) {
2348         event_notifier_set(&vq->host_notifier);
2349     } else if (vq->handle_output) {
2350         vq->handle_output(vdev, vq);
2351 
2352         if (unlikely(vdev->start_on_kick)) {
2353             virtio_set_started(vdev, true);
2354         }
2355     }
2356 }
2357 
2358 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2359 {
2360     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2361         VIRTIO_NO_VECTOR;
2362 }
2363 
2364 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2365 {
2366     VirtQueue *vq = &vdev->vq[n];
2367 
2368     if (n < VIRTIO_QUEUE_MAX) {
2369         if (vdev->vector_queues &&
2370             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2371             QLIST_REMOVE(vq, node);
2372         }
2373         vdev->vq[n].vector = vector;
2374         if (vdev->vector_queues &&
2375             vector != VIRTIO_NO_VECTOR) {
2376             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2377         }
2378     }
2379 }
2380 
2381 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2382                             VirtIOHandleOutput handle_output)
2383 {
2384     int i;
2385 
2386     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2387         if (vdev->vq[i].vring.num == 0)
2388             break;
2389     }
2390 
2391     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2392         abort();
2393 
2394     vdev->vq[i].vring.num = queue_size;
2395     vdev->vq[i].vring.num_default = queue_size;
2396     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2397     vdev->vq[i].handle_output = handle_output;
2398     vdev->vq[i].handle_aio_output = NULL;
2399     vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) *
2400                                        queue_size);
2401 
2402     return &vdev->vq[i];
2403 }
2404 
2405 void virtio_delete_queue(VirtQueue *vq)
2406 {
2407     vq->vring.num = 0;
2408     vq->vring.num_default = 0;
2409     vq->handle_output = NULL;
2410     vq->handle_aio_output = NULL;
2411     g_free(vq->used_elems);
2412     vq->used_elems = NULL;
2413     virtio_virtqueue_reset_region_cache(vq);
2414 }
2415 
2416 void virtio_del_queue(VirtIODevice *vdev, int n)
2417 {
2418     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2419         abort();
2420     }
2421 
2422     virtio_delete_queue(&vdev->vq[n]);
2423 }
2424 
2425 static void virtio_set_isr(VirtIODevice *vdev, int value)
2426 {
2427     uint8_t old = qatomic_read(&vdev->isr);
2428 
2429     /* Do not write ISR if it does not change, so that its cacheline remains
2430      * shared in the common case where the guest does not read it.
2431      */
2432     if ((old & value) != value) {
2433         qatomic_or(&vdev->isr, value);
2434     }
2435 }
2436 
2437 /* Called within rcu_read_lock(). */
2438 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2439 {
2440     uint16_t old, new;
2441     bool v;
2442     /* We need to expose used array entries before checking used event. */
2443     smp_mb();
2444     /* Always notify when queue is empty (when feature acknowledge) */
2445     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2446         !vq->inuse && virtio_queue_empty(vq)) {
2447         return true;
2448     }
2449 
2450     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2451         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2452     }
2453 
2454     v = vq->signalled_used_valid;
2455     vq->signalled_used_valid = true;
2456     old = vq->signalled_used;
2457     new = vq->signalled_used = vq->used_idx;
2458     return !v || vring_need_event(vring_get_used_event(vq), new, old);
2459 }
2460 
2461 static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2462                                     uint16_t off_wrap, uint16_t new,
2463                                     uint16_t old)
2464 {
2465     int off = off_wrap & ~(1 << 15);
2466 
2467     if (wrap != off_wrap >> 15) {
2468         off -= vq->vring.num;
2469     }
2470 
2471     return vring_need_event(off, new, old);
2472 }
2473 
2474 /* Called within rcu_read_lock(). */
2475 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2476 {
2477     VRingPackedDescEvent e;
2478     uint16_t old, new;
2479     bool v;
2480     VRingMemoryRegionCaches *caches;
2481 
2482     caches = vring_get_region_caches(vq);
2483     if (!caches) {
2484         return false;
2485     }
2486 
2487     vring_packed_event_read(vdev, &caches->avail, &e);
2488 
2489     old = vq->signalled_used;
2490     new = vq->signalled_used = vq->used_idx;
2491     v = vq->signalled_used_valid;
2492     vq->signalled_used_valid = true;
2493 
2494     if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2495         return false;
2496     } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2497         return true;
2498     }
2499 
2500     return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2501                                          e.off_wrap, new, old);
2502 }
2503 
2504 /* Called within rcu_read_lock().  */
2505 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2506 {
2507     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2508         return virtio_packed_should_notify(vdev, vq);
2509     } else {
2510         return virtio_split_should_notify(vdev, vq);
2511     }
2512 }
2513 
2514 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2515 {
2516     WITH_RCU_READ_LOCK_GUARD() {
2517         if (!virtio_should_notify(vdev, vq)) {
2518             return;
2519         }
2520     }
2521 
2522     trace_virtio_notify_irqfd(vdev, vq);
2523 
2524     /*
2525      * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2526      * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2527      * incorrectly polling this bit during crashdump and hibernation
2528      * in MSI mode, causing a hang if this bit is never updated.
2529      * Recent releases of Windows do not really shut down, but rather
2530      * log out and hibernate to make the next startup faster.  Hence,
2531      * this manifested as a more serious hang during shutdown with
2532      *
2533      * Next driver release from 2016 fixed this problem, so working around it
2534      * is not a must, but it's easy to do so let's do it here.
2535      *
2536      * Note: it's safe to update ISR from any thread as it was switched
2537      * to an atomic operation.
2538      */
2539     virtio_set_isr(vq->vdev, 0x1);
2540     event_notifier_set(&vq->guest_notifier);
2541 }
2542 
2543 static void virtio_irq(VirtQueue *vq)
2544 {
2545     virtio_set_isr(vq->vdev, 0x1);
2546     virtio_notify_vector(vq->vdev, vq->vector);
2547 }
2548 
2549 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2550 {
2551     WITH_RCU_READ_LOCK_GUARD() {
2552         if (!virtio_should_notify(vdev, vq)) {
2553             return;
2554         }
2555     }
2556 
2557     trace_virtio_notify(vdev, vq);
2558     virtio_irq(vq);
2559 }
2560 
2561 void virtio_notify_config(VirtIODevice *vdev)
2562 {
2563     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2564         return;
2565 
2566     virtio_set_isr(vdev, 0x3);
2567     vdev->generation++;
2568     virtio_notify_vector(vdev, vdev->config_vector);
2569 }
2570 
2571 static bool virtio_device_endian_needed(void *opaque)
2572 {
2573     VirtIODevice *vdev = opaque;
2574 
2575     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2576     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2577         return vdev->device_endian != virtio_default_endian();
2578     }
2579     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2580     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2581 }
2582 
2583 static bool virtio_64bit_features_needed(void *opaque)
2584 {
2585     VirtIODevice *vdev = opaque;
2586 
2587     return (vdev->host_features >> 32) != 0;
2588 }
2589 
2590 static bool virtio_virtqueue_needed(void *opaque)
2591 {
2592     VirtIODevice *vdev = opaque;
2593 
2594     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2595 }
2596 
2597 static bool virtio_packed_virtqueue_needed(void *opaque)
2598 {
2599     VirtIODevice *vdev = opaque;
2600 
2601     return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2602 }
2603 
2604 static bool virtio_ringsize_needed(void *opaque)
2605 {
2606     VirtIODevice *vdev = opaque;
2607     int i;
2608 
2609     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2610         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2611             return true;
2612         }
2613     }
2614     return false;
2615 }
2616 
2617 static bool virtio_extra_state_needed(void *opaque)
2618 {
2619     VirtIODevice *vdev = opaque;
2620     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2621     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2622 
2623     return k->has_extra_state &&
2624         k->has_extra_state(qbus->parent);
2625 }
2626 
2627 static bool virtio_broken_needed(void *opaque)
2628 {
2629     VirtIODevice *vdev = opaque;
2630 
2631     return vdev->broken;
2632 }
2633 
2634 static bool virtio_started_needed(void *opaque)
2635 {
2636     VirtIODevice *vdev = opaque;
2637 
2638     return vdev->started;
2639 }
2640 
2641 static bool virtio_disabled_needed(void *opaque)
2642 {
2643     VirtIODevice *vdev = opaque;
2644 
2645     return vdev->disabled;
2646 }
2647 
2648 static const VMStateDescription vmstate_virtqueue = {
2649     .name = "virtqueue_state",
2650     .version_id = 1,
2651     .minimum_version_id = 1,
2652     .fields = (VMStateField[]) {
2653         VMSTATE_UINT64(vring.avail, struct VirtQueue),
2654         VMSTATE_UINT64(vring.used, struct VirtQueue),
2655         VMSTATE_END_OF_LIST()
2656     }
2657 };
2658 
2659 static const VMStateDescription vmstate_packed_virtqueue = {
2660     .name = "packed_virtqueue_state",
2661     .version_id = 1,
2662     .minimum_version_id = 1,
2663     .fields = (VMStateField[]) {
2664         VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2665         VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2666         VMSTATE_UINT16(used_idx, struct VirtQueue),
2667         VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2668         VMSTATE_UINT32(inuse, struct VirtQueue),
2669         VMSTATE_END_OF_LIST()
2670     }
2671 };
2672 
2673 static const VMStateDescription vmstate_virtio_virtqueues = {
2674     .name = "virtio/virtqueues",
2675     .version_id = 1,
2676     .minimum_version_id = 1,
2677     .needed = &virtio_virtqueue_needed,
2678     .fields = (VMStateField[]) {
2679         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2680                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2681         VMSTATE_END_OF_LIST()
2682     }
2683 };
2684 
2685 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2686     .name = "virtio/packed_virtqueues",
2687     .version_id = 1,
2688     .minimum_version_id = 1,
2689     .needed = &virtio_packed_virtqueue_needed,
2690     .fields = (VMStateField[]) {
2691         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2692                       VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2693         VMSTATE_END_OF_LIST()
2694     }
2695 };
2696 
2697 static const VMStateDescription vmstate_ringsize = {
2698     .name = "ringsize_state",
2699     .version_id = 1,
2700     .minimum_version_id = 1,
2701     .fields = (VMStateField[]) {
2702         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2703         VMSTATE_END_OF_LIST()
2704     }
2705 };
2706 
2707 static const VMStateDescription vmstate_virtio_ringsize = {
2708     .name = "virtio/ringsize",
2709     .version_id = 1,
2710     .minimum_version_id = 1,
2711     .needed = &virtio_ringsize_needed,
2712     .fields = (VMStateField[]) {
2713         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2714                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2715         VMSTATE_END_OF_LIST()
2716     }
2717 };
2718 
2719 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2720                            const VMStateField *field)
2721 {
2722     VirtIODevice *vdev = pv;
2723     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2724     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2725 
2726     if (!k->load_extra_state) {
2727         return -1;
2728     } else {
2729         return k->load_extra_state(qbus->parent, f);
2730     }
2731 }
2732 
2733 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2734                            const VMStateField *field, JSONWriter *vmdesc)
2735 {
2736     VirtIODevice *vdev = pv;
2737     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2738     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2739 
2740     k->save_extra_state(qbus->parent, f);
2741     return 0;
2742 }
2743 
2744 static const VMStateInfo vmstate_info_extra_state = {
2745     .name = "virtqueue_extra_state",
2746     .get = get_extra_state,
2747     .put = put_extra_state,
2748 };
2749 
2750 static const VMStateDescription vmstate_virtio_extra_state = {
2751     .name = "virtio/extra_state",
2752     .version_id = 1,
2753     .minimum_version_id = 1,
2754     .needed = &virtio_extra_state_needed,
2755     .fields = (VMStateField[]) {
2756         {
2757             .name         = "extra_state",
2758             .version_id   = 0,
2759             .field_exists = NULL,
2760             .size         = 0,
2761             .info         = &vmstate_info_extra_state,
2762             .flags        = VMS_SINGLE,
2763             .offset       = 0,
2764         },
2765         VMSTATE_END_OF_LIST()
2766     }
2767 };
2768 
2769 static const VMStateDescription vmstate_virtio_device_endian = {
2770     .name = "virtio/device_endian",
2771     .version_id = 1,
2772     .minimum_version_id = 1,
2773     .needed = &virtio_device_endian_needed,
2774     .fields = (VMStateField[]) {
2775         VMSTATE_UINT8(device_endian, VirtIODevice),
2776         VMSTATE_END_OF_LIST()
2777     }
2778 };
2779 
2780 static const VMStateDescription vmstate_virtio_64bit_features = {
2781     .name = "virtio/64bit_features",
2782     .version_id = 1,
2783     .minimum_version_id = 1,
2784     .needed = &virtio_64bit_features_needed,
2785     .fields = (VMStateField[]) {
2786         VMSTATE_UINT64(guest_features, VirtIODevice),
2787         VMSTATE_END_OF_LIST()
2788     }
2789 };
2790 
2791 static const VMStateDescription vmstate_virtio_broken = {
2792     .name = "virtio/broken",
2793     .version_id = 1,
2794     .minimum_version_id = 1,
2795     .needed = &virtio_broken_needed,
2796     .fields = (VMStateField[]) {
2797         VMSTATE_BOOL(broken, VirtIODevice),
2798         VMSTATE_END_OF_LIST()
2799     }
2800 };
2801 
2802 static const VMStateDescription vmstate_virtio_started = {
2803     .name = "virtio/started",
2804     .version_id = 1,
2805     .minimum_version_id = 1,
2806     .needed = &virtio_started_needed,
2807     .fields = (VMStateField[]) {
2808         VMSTATE_BOOL(started, VirtIODevice),
2809         VMSTATE_END_OF_LIST()
2810     }
2811 };
2812 
2813 static const VMStateDescription vmstate_virtio_disabled = {
2814     .name = "virtio/disabled",
2815     .version_id = 1,
2816     .minimum_version_id = 1,
2817     .needed = &virtio_disabled_needed,
2818     .fields = (VMStateField[]) {
2819         VMSTATE_BOOL(disabled, VirtIODevice),
2820         VMSTATE_END_OF_LIST()
2821     }
2822 };
2823 
2824 static const VMStateDescription vmstate_virtio = {
2825     .name = "virtio",
2826     .version_id = 1,
2827     .minimum_version_id = 1,
2828     .minimum_version_id_old = 1,
2829     .fields = (VMStateField[]) {
2830         VMSTATE_END_OF_LIST()
2831     },
2832     .subsections = (const VMStateDescription*[]) {
2833         &vmstate_virtio_device_endian,
2834         &vmstate_virtio_64bit_features,
2835         &vmstate_virtio_virtqueues,
2836         &vmstate_virtio_ringsize,
2837         &vmstate_virtio_broken,
2838         &vmstate_virtio_extra_state,
2839         &vmstate_virtio_started,
2840         &vmstate_virtio_packed_virtqueues,
2841         &vmstate_virtio_disabled,
2842         NULL
2843     }
2844 };
2845 
2846 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2847 {
2848     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2849     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2850     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2851     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2852     int i;
2853 
2854     if (k->save_config) {
2855         k->save_config(qbus->parent, f);
2856     }
2857 
2858     qemu_put_8s(f, &vdev->status);
2859     qemu_put_8s(f, &vdev->isr);
2860     qemu_put_be16s(f, &vdev->queue_sel);
2861     qemu_put_be32s(f, &guest_features_lo);
2862     qemu_put_be32(f, vdev->config_len);
2863     qemu_put_buffer(f, vdev->config, vdev->config_len);
2864 
2865     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2866         if (vdev->vq[i].vring.num == 0)
2867             break;
2868     }
2869 
2870     qemu_put_be32(f, i);
2871 
2872     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2873         if (vdev->vq[i].vring.num == 0)
2874             break;
2875 
2876         qemu_put_be32(f, vdev->vq[i].vring.num);
2877         if (k->has_variable_vring_alignment) {
2878             qemu_put_be32(f, vdev->vq[i].vring.align);
2879         }
2880         /*
2881          * Save desc now, the rest of the ring addresses are saved in
2882          * subsections for VIRTIO-1 devices.
2883          */
2884         qemu_put_be64(f, vdev->vq[i].vring.desc);
2885         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2886         if (k->save_queue) {
2887             k->save_queue(qbus->parent, i, f);
2888         }
2889     }
2890 
2891     if (vdc->save != NULL) {
2892         vdc->save(vdev, f);
2893     }
2894 
2895     if (vdc->vmsd) {
2896         int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2897         if (ret) {
2898             return ret;
2899         }
2900     }
2901 
2902     /* Subsections */
2903     return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2904 }
2905 
2906 /* A wrapper for use as a VMState .put function */
2907 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2908                               const VMStateField *field, JSONWriter *vmdesc)
2909 {
2910     return virtio_save(VIRTIO_DEVICE(opaque), f);
2911 }
2912 
2913 /* A wrapper for use as a VMState .get function */
2914 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2915                              const VMStateField *field)
2916 {
2917     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2918     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2919 
2920     return virtio_load(vdev, f, dc->vmsd->version_id);
2921 }
2922 
2923 const VMStateInfo  virtio_vmstate_info = {
2924     .name = "virtio",
2925     .get = virtio_device_get,
2926     .put = virtio_device_put,
2927 };
2928 
2929 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2930 {
2931     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2932     bool bad = (val & ~(vdev->host_features)) != 0;
2933 
2934     val &= vdev->host_features;
2935     if (k->set_features) {
2936         k->set_features(vdev, val);
2937     }
2938     vdev->guest_features = val;
2939     return bad ? -1 : 0;
2940 }
2941 
2942 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2943 {
2944     int ret;
2945     /*
2946      * The driver must not attempt to set features after feature negotiation
2947      * has finished.
2948      */
2949     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2950         return -EINVAL;
2951     }
2952     ret = virtio_set_features_nocheck(vdev, val);
2953     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2954         /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2955         int i;
2956         for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2957             if (vdev->vq[i].vring.num != 0) {
2958                 virtio_init_region_cache(vdev, i);
2959             }
2960         }
2961     }
2962     if (!ret) {
2963         if (!virtio_device_started(vdev, vdev->status) &&
2964             !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2965             vdev->start_on_kick = true;
2966         }
2967     }
2968     return ret;
2969 }
2970 
2971 size_t virtio_feature_get_config_size(const VirtIOFeature *feature_sizes,
2972                                       uint64_t host_features)
2973 {
2974     size_t config_size = 0;
2975     int i;
2976 
2977     for (i = 0; feature_sizes[i].flags != 0; i++) {
2978         if (host_features & feature_sizes[i].flags) {
2979             config_size = MAX(feature_sizes[i].end, config_size);
2980         }
2981     }
2982 
2983     return config_size;
2984 }
2985 
2986 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
2987 {
2988     int i, ret;
2989     int32_t config_len;
2990     uint32_t num;
2991     uint32_t features;
2992     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2993     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2994     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2995 
2996     /*
2997      * We poison the endianness to ensure it does not get used before
2998      * subsections have been loaded.
2999      */
3000     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3001 
3002     if (k->load_config) {
3003         ret = k->load_config(qbus->parent, f);
3004         if (ret)
3005             return ret;
3006     }
3007 
3008     qemu_get_8s(f, &vdev->status);
3009     qemu_get_8s(f, &vdev->isr);
3010     qemu_get_be16s(f, &vdev->queue_sel);
3011     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3012         return -1;
3013     }
3014     qemu_get_be32s(f, &features);
3015 
3016     /*
3017      * Temporarily set guest_features low bits - needed by
3018      * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3019      * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3020      *
3021      * Note: devices should always test host features in future - don't create
3022      * new dependencies like this.
3023      */
3024     vdev->guest_features = features;
3025 
3026     config_len = qemu_get_be32(f);
3027 
3028     /*
3029      * There are cases where the incoming config can be bigger or smaller
3030      * than what we have; so load what we have space for, and skip
3031      * any excess that's in the stream.
3032      */
3033     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3034 
3035     while (config_len > vdev->config_len) {
3036         qemu_get_byte(f);
3037         config_len--;
3038     }
3039 
3040     num = qemu_get_be32(f);
3041 
3042     if (num > VIRTIO_QUEUE_MAX) {
3043         error_report("Invalid number of virtqueues: 0x%x", num);
3044         return -1;
3045     }
3046 
3047     for (i = 0; i < num; i++) {
3048         vdev->vq[i].vring.num = qemu_get_be32(f);
3049         if (k->has_variable_vring_alignment) {
3050             vdev->vq[i].vring.align = qemu_get_be32(f);
3051         }
3052         vdev->vq[i].vring.desc = qemu_get_be64(f);
3053         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3054         vdev->vq[i].signalled_used_valid = false;
3055         vdev->vq[i].notification = true;
3056 
3057         if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3058             error_report("VQ %d address 0x0 "
3059                          "inconsistent with Host index 0x%x",
3060                          i, vdev->vq[i].last_avail_idx);
3061             return -1;
3062         }
3063         if (k->load_queue) {
3064             ret = k->load_queue(qbus->parent, i, f);
3065             if (ret)
3066                 return ret;
3067         }
3068     }
3069 
3070     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3071 
3072     if (vdc->load != NULL) {
3073         ret = vdc->load(vdev, f, version_id);
3074         if (ret) {
3075             return ret;
3076         }
3077     }
3078 
3079     if (vdc->vmsd) {
3080         ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3081         if (ret) {
3082             return ret;
3083         }
3084     }
3085 
3086     /* Subsections */
3087     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3088     if (ret) {
3089         return ret;
3090     }
3091 
3092     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3093         vdev->device_endian = virtio_default_endian();
3094     }
3095 
3096     if (virtio_64bit_features_needed(vdev)) {
3097         /*
3098          * Subsection load filled vdev->guest_features.  Run them
3099          * through virtio_set_features to sanity-check them against
3100          * host_features.
3101          */
3102         uint64_t features64 = vdev->guest_features;
3103         if (virtio_set_features_nocheck(vdev, features64) < 0) {
3104             error_report("Features 0x%" PRIx64 " unsupported. "
3105                          "Allowed features: 0x%" PRIx64,
3106                          features64, vdev->host_features);
3107             return -1;
3108         }
3109     } else {
3110         if (virtio_set_features_nocheck(vdev, features) < 0) {
3111             error_report("Features 0x%x unsupported. "
3112                          "Allowed features: 0x%" PRIx64,
3113                          features, vdev->host_features);
3114             return -1;
3115         }
3116     }
3117 
3118     if (!virtio_device_started(vdev, vdev->status) &&
3119         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3120         vdev->start_on_kick = true;
3121     }
3122 
3123     RCU_READ_LOCK_GUARD();
3124     for (i = 0; i < num; i++) {
3125         if (vdev->vq[i].vring.desc) {
3126             uint16_t nheads;
3127 
3128             /*
3129              * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3130              * only the region cache needs to be set up.  Legacy devices need
3131              * to calculate used and avail ring addresses based on the desc
3132              * address.
3133              */
3134             if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3135                 virtio_init_region_cache(vdev, i);
3136             } else {
3137                 virtio_queue_update_rings(vdev, i);
3138             }
3139 
3140             if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3141                 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3142                 vdev->vq[i].shadow_avail_wrap_counter =
3143                                         vdev->vq[i].last_avail_wrap_counter;
3144                 continue;
3145             }
3146 
3147             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3148             /* Check it isn't doing strange things with descriptor numbers. */
3149             if (nheads > vdev->vq[i].vring.num) {
3150                 virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3151                              "inconsistent with Host index 0x%x: delta 0x%x",
3152                              i, vdev->vq[i].vring.num,
3153                              vring_avail_idx(&vdev->vq[i]),
3154                              vdev->vq[i].last_avail_idx, nheads);
3155                 vdev->vq[i].used_idx = 0;
3156                 vdev->vq[i].shadow_avail_idx = 0;
3157                 vdev->vq[i].inuse = 0;
3158                 continue;
3159             }
3160             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3161             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3162 
3163             /*
3164              * Some devices migrate VirtQueueElements that have been popped
3165              * from the avail ring but not yet returned to the used ring.
3166              * Since max ring size < UINT16_MAX it's safe to use modulo
3167              * UINT16_MAX + 1 subtraction.
3168              */
3169             vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3170                                 vdev->vq[i].used_idx);
3171             if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3172                 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3173                              "used_idx 0x%x",
3174                              i, vdev->vq[i].vring.num,
3175                              vdev->vq[i].last_avail_idx,
3176                              vdev->vq[i].used_idx);
3177                 return -1;
3178             }
3179         }
3180     }
3181 
3182     if (vdc->post_load) {
3183         ret = vdc->post_load(vdev);
3184         if (ret) {
3185             return ret;
3186         }
3187     }
3188 
3189     return 0;
3190 }
3191 
3192 void virtio_cleanup(VirtIODevice *vdev)
3193 {
3194     qemu_del_vm_change_state_handler(vdev->vmstate);
3195 }
3196 
3197 static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3198 {
3199     VirtIODevice *vdev = opaque;
3200     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3201     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3202     bool backend_run = running && virtio_device_started(vdev, vdev->status);
3203     vdev->vm_running = running;
3204 
3205     if (backend_run) {
3206         virtio_set_status(vdev, vdev->status);
3207     }
3208 
3209     if (k->vmstate_change) {
3210         k->vmstate_change(qbus->parent, backend_run);
3211     }
3212 
3213     if (!backend_run) {
3214         virtio_set_status(vdev, vdev->status);
3215     }
3216 }
3217 
3218 void virtio_instance_init_common(Object *proxy_obj, void *data,
3219                                  size_t vdev_size, const char *vdev_name)
3220 {
3221     DeviceState *vdev = data;
3222 
3223     object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3224                                        vdev_size, vdev_name, &error_abort,
3225                                        NULL);
3226     qdev_alias_all_properties(vdev, proxy_obj);
3227 }
3228 
3229 void virtio_init(VirtIODevice *vdev, const char *name,
3230                  uint16_t device_id, size_t config_size)
3231 {
3232     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3233     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3234     int i;
3235     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3236 
3237     if (nvectors) {
3238         vdev->vector_queues =
3239             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3240     }
3241 
3242     vdev->start_on_kick = false;
3243     vdev->started = false;
3244     vdev->device_id = device_id;
3245     vdev->status = 0;
3246     qatomic_set(&vdev->isr, 0);
3247     vdev->queue_sel = 0;
3248     vdev->config_vector = VIRTIO_NO_VECTOR;
3249     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
3250     vdev->vm_running = runstate_is_running();
3251     vdev->broken = false;
3252     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3253         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3254         vdev->vq[i].vdev = vdev;
3255         vdev->vq[i].queue_index = i;
3256         vdev->vq[i].host_notifier_enabled = false;
3257     }
3258 
3259     vdev->name = name;
3260     vdev->config_len = config_size;
3261     if (vdev->config_len) {
3262         vdev->config = g_malloc0(config_size);
3263     } else {
3264         vdev->config = NULL;
3265     }
3266     vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3267             virtio_vmstate_change, vdev);
3268     vdev->device_endian = virtio_default_endian();
3269     vdev->use_guest_notifier_mask = true;
3270 }
3271 
3272 /*
3273  * Only devices that have already been around prior to defining the virtio
3274  * standard support legacy mode; this includes devices not specified in the
3275  * standard. All newer devices conform to the virtio standard only.
3276  */
3277 bool virtio_legacy_allowed(VirtIODevice *vdev)
3278 {
3279     switch (vdev->device_id) {
3280     case VIRTIO_ID_NET:
3281     case VIRTIO_ID_BLOCK:
3282     case VIRTIO_ID_CONSOLE:
3283     case VIRTIO_ID_RNG:
3284     case VIRTIO_ID_BALLOON:
3285     case VIRTIO_ID_RPMSG:
3286     case VIRTIO_ID_SCSI:
3287     case VIRTIO_ID_9P:
3288     case VIRTIO_ID_RPROC_SERIAL:
3289     case VIRTIO_ID_CAIF:
3290         return true;
3291     default:
3292         return false;
3293     }
3294 }
3295 
3296 bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3297 {
3298     return vdev->disable_legacy_check;
3299 }
3300 
3301 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3302 {
3303     return vdev->vq[n].vring.desc;
3304 }
3305 
3306 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3307 {
3308     return virtio_queue_get_desc_addr(vdev, n) != 0;
3309 }
3310 
3311 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3312 {
3313     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3314     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3315 
3316     if (k->queue_enabled) {
3317         return k->queue_enabled(qbus->parent, n);
3318     }
3319     return virtio_queue_enabled_legacy(vdev, n);
3320 }
3321 
3322 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3323 {
3324     return vdev->vq[n].vring.avail;
3325 }
3326 
3327 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3328 {
3329     return vdev->vq[n].vring.used;
3330 }
3331 
3332 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3333 {
3334     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3335 }
3336 
3337 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3338 {
3339     int s;
3340 
3341     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3342         return sizeof(struct VRingPackedDescEvent);
3343     }
3344 
3345     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3346     return offsetof(VRingAvail, ring) +
3347         sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3348 }
3349 
3350 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3351 {
3352     int s;
3353 
3354     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3355         return sizeof(struct VRingPackedDescEvent);
3356     }
3357 
3358     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3359     return offsetof(VRingUsed, ring) +
3360         sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3361 }
3362 
3363 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3364                                                            int n)
3365 {
3366     unsigned int avail, used;
3367 
3368     avail = vdev->vq[n].last_avail_idx;
3369     avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3370 
3371     used = vdev->vq[n].used_idx;
3372     used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3373 
3374     return avail | used << 16;
3375 }
3376 
3377 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3378                                                       int n)
3379 {
3380     return vdev->vq[n].last_avail_idx;
3381 }
3382 
3383 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3384 {
3385     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3386         return virtio_queue_packed_get_last_avail_idx(vdev, n);
3387     } else {
3388         return virtio_queue_split_get_last_avail_idx(vdev, n);
3389     }
3390 }
3391 
3392 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3393                                                    int n, unsigned int idx)
3394 {
3395     struct VirtQueue *vq = &vdev->vq[n];
3396 
3397     vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3398     vq->last_avail_wrap_counter =
3399         vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3400     idx >>= 16;
3401     vq->used_idx = idx & 0x7ffff;
3402     vq->used_wrap_counter = !!(idx & 0x8000);
3403 }
3404 
3405 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3406                                                   int n, unsigned int idx)
3407 {
3408         vdev->vq[n].last_avail_idx = idx;
3409         vdev->vq[n].shadow_avail_idx = idx;
3410 }
3411 
3412 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3413                                      unsigned int idx)
3414 {
3415     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3416         virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3417     } else {
3418         virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3419     }
3420 }
3421 
3422 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3423                                                        int n)
3424 {
3425     /* We don't have a reference like avail idx in shared memory */
3426     return;
3427 }
3428 
3429 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3430                                                       int n)
3431 {
3432     RCU_READ_LOCK_GUARD();
3433     if (vdev->vq[n].vring.desc) {
3434         vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3435         vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3436     }
3437 }
3438 
3439 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3440 {
3441     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3442         virtio_queue_packed_restore_last_avail_idx(vdev, n);
3443     } else {
3444         virtio_queue_split_restore_last_avail_idx(vdev, n);
3445     }
3446 }
3447 
3448 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3449 {
3450     /* used idx was updated through set_last_avail_idx() */
3451     return;
3452 }
3453 
3454 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3455 {
3456     RCU_READ_LOCK_GUARD();
3457     if (vdev->vq[n].vring.desc) {
3458         vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3459     }
3460 }
3461 
3462 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3463 {
3464     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3465         return virtio_queue_packed_update_used_idx(vdev, n);
3466     } else {
3467         return virtio_split_packed_update_used_idx(vdev, n);
3468     }
3469 }
3470 
3471 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3472 {
3473     vdev->vq[n].signalled_used_valid = false;
3474 }
3475 
3476 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3477 {
3478     return vdev->vq + n;
3479 }
3480 
3481 uint16_t virtio_get_queue_index(VirtQueue *vq)
3482 {
3483     return vq->queue_index;
3484 }
3485 
3486 static void virtio_queue_guest_notifier_read(EventNotifier *n)
3487 {
3488     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3489     if (event_notifier_test_and_clear(n)) {
3490         virtio_irq(vq);
3491     }
3492 }
3493 
3494 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3495                                                 bool with_irqfd)
3496 {
3497     if (assign && !with_irqfd) {
3498         event_notifier_set_handler(&vq->guest_notifier,
3499                                    virtio_queue_guest_notifier_read);
3500     } else {
3501         event_notifier_set_handler(&vq->guest_notifier, NULL);
3502     }
3503     if (!assign) {
3504         /* Test and clear notifier before closing it,
3505          * in case poll callback didn't have time to run. */
3506         virtio_queue_guest_notifier_read(&vq->guest_notifier);
3507     }
3508 }
3509 
3510 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3511 {
3512     return &vq->guest_notifier;
3513 }
3514 
3515 static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
3516 {
3517     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3518     if (event_notifier_test_and_clear(n)) {
3519         virtio_queue_notify_aio_vq(vq);
3520     }
3521 }
3522 
3523 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3524 {
3525     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3526 
3527     virtio_queue_set_notification(vq, 0);
3528 }
3529 
3530 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3531 {
3532     EventNotifier *n = opaque;
3533     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3534 
3535     return vq->vring.desc && !virtio_queue_empty(vq);
3536 }
3537 
3538 static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n)
3539 {
3540     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3541 
3542     virtio_queue_notify_aio_vq(vq);
3543 }
3544 
3545 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3546 {
3547     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3548 
3549     /* Caller polls once more after this to catch requests that race with us */
3550     virtio_queue_set_notification(vq, 1);
3551 }
3552 
3553 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
3554         VirtIOHandleOutput handle_output)
3555 {
3556     if (handle_output) {
3557         vq->handle_aio_output = handle_output;
3558         aio_set_event_notifier(ctx, &vq->host_notifier, true,
3559                                virtio_queue_host_notifier_aio_read,
3560                                virtio_queue_host_notifier_aio_poll,
3561                                virtio_queue_host_notifier_aio_poll_ready);
3562         aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3563                                     virtio_queue_host_notifier_aio_poll_begin,
3564                                     virtio_queue_host_notifier_aio_poll_end);
3565     } else {
3566         aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL);
3567         /* Test and clear notifier before after disabling event,
3568          * in case poll callback didn't have time to run. */
3569         virtio_queue_host_notifier_aio_read(&vq->host_notifier);
3570         vq->handle_aio_output = NULL;
3571     }
3572 }
3573 
3574 void virtio_queue_host_notifier_read(EventNotifier *n)
3575 {
3576     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3577     if (event_notifier_test_and_clear(n)) {
3578         virtio_queue_notify_vq(vq);
3579     }
3580 }
3581 
3582 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3583 {
3584     return &vq->host_notifier;
3585 }
3586 
3587 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3588 {
3589     vq->host_notifier_enabled = enabled;
3590 }
3591 
3592 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3593                                       MemoryRegion *mr, bool assign)
3594 {
3595     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3596     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3597 
3598     if (k->set_host_notifier_mr) {
3599         return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3600     }
3601 
3602     return -1;
3603 }
3604 
3605 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3606 {
3607     g_free(vdev->bus_name);
3608     vdev->bus_name = g_strdup(bus_name);
3609 }
3610 
3611 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3612 {
3613     va_list ap;
3614 
3615     va_start(ap, fmt);
3616     error_vreport(fmt, ap);
3617     va_end(ap);
3618 
3619     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3620         vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3621         virtio_notify_config(vdev);
3622     }
3623 
3624     vdev->broken = true;
3625 }
3626 
3627 static void virtio_memory_listener_commit(MemoryListener *listener)
3628 {
3629     VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3630     int i;
3631 
3632     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3633         if (vdev->vq[i].vring.num == 0) {
3634             break;
3635         }
3636         virtio_init_region_cache(vdev, i);
3637     }
3638 }
3639 
3640 static void virtio_device_realize(DeviceState *dev, Error **errp)
3641 {
3642     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3643     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3644     Error *err = NULL;
3645 
3646     /* Devices should either use vmsd or the load/save methods */
3647     assert(!vdc->vmsd || !vdc->load);
3648 
3649     if (vdc->realize != NULL) {
3650         vdc->realize(dev, &err);
3651         if (err != NULL) {
3652             error_propagate(errp, err);
3653             return;
3654         }
3655     }
3656 
3657     virtio_bus_device_plugged(vdev, &err);
3658     if (err != NULL) {
3659         error_propagate(errp, err);
3660         vdc->unrealize(dev);
3661         return;
3662     }
3663 
3664     vdev->listener.commit = virtio_memory_listener_commit;
3665     vdev->listener.name = "virtio";
3666     memory_listener_register(&vdev->listener, vdev->dma_as);
3667 }
3668 
3669 static void virtio_device_unrealize(DeviceState *dev)
3670 {
3671     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3672     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3673 
3674     memory_listener_unregister(&vdev->listener);
3675     virtio_bus_device_unplugged(vdev);
3676 
3677     if (vdc->unrealize != NULL) {
3678         vdc->unrealize(dev);
3679     }
3680 
3681     g_free(vdev->bus_name);
3682     vdev->bus_name = NULL;
3683 }
3684 
3685 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3686 {
3687     int i;
3688     if (!vdev->vq) {
3689         return;
3690     }
3691 
3692     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3693         if (vdev->vq[i].vring.num == 0) {
3694             break;
3695         }
3696         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3697     }
3698     g_free(vdev->vq);
3699 }
3700 
3701 static void virtio_device_instance_finalize(Object *obj)
3702 {
3703     VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3704 
3705     virtio_device_free_virtqueues(vdev);
3706 
3707     g_free(vdev->config);
3708     g_free(vdev->vector_queues);
3709 }
3710 
3711 static Property virtio_properties[] = {
3712     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3713     DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3714     DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3715     DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3716                      disable_legacy_check, false),
3717     DEFINE_PROP_END_OF_LIST(),
3718 };
3719 
3720 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3721 {
3722     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3723     int i, n, r, err;
3724 
3725     /*
3726      * Batch all the host notifiers in a single transaction to avoid
3727      * quadratic time complexity in address_space_update_ioeventfds().
3728      */
3729     memory_region_transaction_begin();
3730     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3731         VirtQueue *vq = &vdev->vq[n];
3732         if (!virtio_queue_get_num(vdev, n)) {
3733             continue;
3734         }
3735         r = virtio_bus_set_host_notifier(qbus, n, true);
3736         if (r < 0) {
3737             err = r;
3738             goto assign_error;
3739         }
3740         event_notifier_set_handler(&vq->host_notifier,
3741                                    virtio_queue_host_notifier_read);
3742     }
3743 
3744     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3745         /* Kick right away to begin processing requests already in vring */
3746         VirtQueue *vq = &vdev->vq[n];
3747         if (!vq->vring.num) {
3748             continue;
3749         }
3750         event_notifier_set(&vq->host_notifier);
3751     }
3752     memory_region_transaction_commit();
3753     return 0;
3754 
3755 assign_error:
3756     i = n; /* save n for a second iteration after transaction is committed. */
3757     while (--n >= 0) {
3758         VirtQueue *vq = &vdev->vq[n];
3759         if (!virtio_queue_get_num(vdev, n)) {
3760             continue;
3761         }
3762 
3763         event_notifier_set_handler(&vq->host_notifier, NULL);
3764         r = virtio_bus_set_host_notifier(qbus, n, false);
3765         assert(r >= 0);
3766     }
3767     /*
3768      * The transaction expects the ioeventfds to be open when it
3769      * commits. Do it now, before the cleanup loop.
3770      */
3771     memory_region_transaction_commit();
3772 
3773     while (--i >= 0) {
3774         if (!virtio_queue_get_num(vdev, i)) {
3775             continue;
3776         }
3777         virtio_bus_cleanup_host_notifier(qbus, i);
3778     }
3779     return err;
3780 }
3781 
3782 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3783 {
3784     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3785     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3786 
3787     return virtio_bus_start_ioeventfd(vbus);
3788 }
3789 
3790 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3791 {
3792     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3793     int n, r;
3794 
3795     /*
3796      * Batch all the host notifiers in a single transaction to avoid
3797      * quadratic time complexity in address_space_update_ioeventfds().
3798      */
3799     memory_region_transaction_begin();
3800     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3801         VirtQueue *vq = &vdev->vq[n];
3802 
3803         if (!virtio_queue_get_num(vdev, n)) {
3804             continue;
3805         }
3806         event_notifier_set_handler(&vq->host_notifier, NULL);
3807         r = virtio_bus_set_host_notifier(qbus, n, false);
3808         assert(r >= 0);
3809     }
3810     /*
3811      * The transaction expects the ioeventfds to be open when it
3812      * commits. Do it now, before the cleanup loop.
3813      */
3814     memory_region_transaction_commit();
3815 
3816     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3817         if (!virtio_queue_get_num(vdev, n)) {
3818             continue;
3819         }
3820         virtio_bus_cleanup_host_notifier(qbus, n);
3821     }
3822 }
3823 
3824 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3825 {
3826     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3827     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3828 
3829     return virtio_bus_grab_ioeventfd(vbus);
3830 }
3831 
3832 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3833 {
3834     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3835     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3836 
3837     virtio_bus_release_ioeventfd(vbus);
3838 }
3839 
3840 static void virtio_device_class_init(ObjectClass *klass, void *data)
3841 {
3842     /* Set the default value here. */
3843     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3844     DeviceClass *dc = DEVICE_CLASS(klass);
3845 
3846     dc->realize = virtio_device_realize;
3847     dc->unrealize = virtio_device_unrealize;
3848     dc->bus_type = TYPE_VIRTIO_BUS;
3849     device_class_set_props(dc, virtio_properties);
3850     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3851     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3852 
3853     vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3854 }
3855 
3856 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3857 {
3858     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3859     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3860 
3861     return virtio_bus_ioeventfd_enabled(vbus);
3862 }
3863 
3864 static const TypeInfo virtio_device_info = {
3865     .name = TYPE_VIRTIO_DEVICE,
3866     .parent = TYPE_DEVICE,
3867     .instance_size = sizeof(VirtIODevice),
3868     .class_init = virtio_device_class_init,
3869     .instance_finalize = virtio_device_instance_finalize,
3870     .abstract = true,
3871     .class_size = sizeof(VirtioDeviceClass),
3872 };
3873 
3874 static void virtio_register_types(void)
3875 {
3876     type_register_static(&virtio_device_info);
3877 }
3878 
3879 type_init(virtio_register_types)
3880