xref: /openbmc/qemu/hw/virtio/virtio.c (revision 1580b897)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "cpu.h"
17 #include "trace.h"
18 #include "qemu/error-report.h"
19 #include "qemu/log.h"
20 #include "qemu/main-loop.h"
21 #include "qemu/module.h"
22 #include "hw/virtio/virtio.h"
23 #include "migration/qemu-file-types.h"
24 #include "qemu/atomic.h"
25 #include "hw/virtio/virtio-bus.h"
26 #include "hw/qdev-properties.h"
27 #include "hw/virtio/virtio-access.h"
28 #include "sysemu/dma.h"
29 #include "sysemu/runstate.h"
30 #include "standard-headers/linux/virtio_ids.h"
31 
32 /*
33  * The alignment to use between consumer and producer parts of vring.
34  * x86 pagesize again. This is the default, used by transports like PCI
35  * which don't provide a means for the guest to tell the host the alignment.
36  */
37 #define VIRTIO_PCI_VRING_ALIGN         4096
38 
39 typedef struct VRingDesc
40 {
41     uint64_t addr;
42     uint32_t len;
43     uint16_t flags;
44     uint16_t next;
45 } VRingDesc;
46 
47 typedef struct VRingPackedDesc {
48     uint64_t addr;
49     uint32_t len;
50     uint16_t id;
51     uint16_t flags;
52 } VRingPackedDesc;
53 
54 typedef struct VRingAvail
55 {
56     uint16_t flags;
57     uint16_t idx;
58     uint16_t ring[];
59 } VRingAvail;
60 
61 typedef struct VRingUsedElem
62 {
63     uint32_t id;
64     uint32_t len;
65 } VRingUsedElem;
66 
67 typedef struct VRingUsed
68 {
69     uint16_t flags;
70     uint16_t idx;
71     VRingUsedElem ring[];
72 } VRingUsed;
73 
74 typedef struct VRingMemoryRegionCaches {
75     struct rcu_head rcu;
76     MemoryRegionCache desc;
77     MemoryRegionCache avail;
78     MemoryRegionCache used;
79 } VRingMemoryRegionCaches;
80 
81 typedef struct VRing
82 {
83     unsigned int num;
84     unsigned int num_default;
85     unsigned int align;
86     hwaddr desc;
87     hwaddr avail;
88     hwaddr used;
89     VRingMemoryRegionCaches *caches;
90 } VRing;
91 
92 typedef struct VRingPackedDescEvent {
93     uint16_t off_wrap;
94     uint16_t flags;
95 } VRingPackedDescEvent ;
96 
97 struct VirtQueue
98 {
99     VRing vring;
100     VirtQueueElement *used_elems;
101 
102     /* Next head to pop */
103     uint16_t last_avail_idx;
104     bool last_avail_wrap_counter;
105 
106     /* Last avail_idx read from VQ. */
107     uint16_t shadow_avail_idx;
108     bool shadow_avail_wrap_counter;
109 
110     uint16_t used_idx;
111     bool used_wrap_counter;
112 
113     /* Last used index value we have signalled on */
114     uint16_t signalled_used;
115 
116     /* Last used index value we have signalled on */
117     bool signalled_used_valid;
118 
119     /* Notification enabled? */
120     bool notification;
121 
122     uint16_t queue_index;
123 
124     unsigned int inuse;
125 
126     uint16_t vector;
127     VirtIOHandleOutput handle_output;
128     VirtIOHandleAIOOutput handle_aio_output;
129     VirtIODevice *vdev;
130     EventNotifier guest_notifier;
131     EventNotifier host_notifier;
132     bool host_notifier_enabled;
133     QLIST_ENTRY(VirtQueue) node;
134 };
135 
136 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
137 {
138     if (!caches) {
139         return;
140     }
141 
142     address_space_cache_destroy(&caches->desc);
143     address_space_cache_destroy(&caches->avail);
144     address_space_cache_destroy(&caches->used);
145     g_free(caches);
146 }
147 
148 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
149 {
150     VRingMemoryRegionCaches *caches;
151 
152     caches = qatomic_read(&vq->vring.caches);
153     qatomic_rcu_set(&vq->vring.caches, NULL);
154     if (caches) {
155         call_rcu(caches, virtio_free_region_cache, rcu);
156     }
157 }
158 
159 static void virtio_init_region_cache(VirtIODevice *vdev, int n)
160 {
161     VirtQueue *vq = &vdev->vq[n];
162     VRingMemoryRegionCaches *old = vq->vring.caches;
163     VRingMemoryRegionCaches *new = NULL;
164     hwaddr addr, size;
165     int64_t len;
166     bool packed;
167 
168 
169     addr = vq->vring.desc;
170     if (!addr) {
171         goto out_no_cache;
172     }
173     new = g_new0(VRingMemoryRegionCaches, 1);
174     size = virtio_queue_get_desc_size(vdev, n);
175     packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
176                                    true : false;
177     len = address_space_cache_init(&new->desc, vdev->dma_as,
178                                    addr, size, packed);
179     if (len < size) {
180         virtio_error(vdev, "Cannot map desc");
181         goto err_desc;
182     }
183 
184     size = virtio_queue_get_used_size(vdev, n);
185     len = address_space_cache_init(&new->used, vdev->dma_as,
186                                    vq->vring.used, size, true);
187     if (len < size) {
188         virtio_error(vdev, "Cannot map used");
189         goto err_used;
190     }
191 
192     size = virtio_queue_get_avail_size(vdev, n);
193     len = address_space_cache_init(&new->avail, vdev->dma_as,
194                                    vq->vring.avail, size, false);
195     if (len < size) {
196         virtio_error(vdev, "Cannot map avail");
197         goto err_avail;
198     }
199 
200     qatomic_rcu_set(&vq->vring.caches, new);
201     if (old) {
202         call_rcu(old, virtio_free_region_cache, rcu);
203     }
204     return;
205 
206 err_avail:
207     address_space_cache_destroy(&new->avail);
208 err_used:
209     address_space_cache_destroy(&new->used);
210 err_desc:
211     address_space_cache_destroy(&new->desc);
212 out_no_cache:
213     g_free(new);
214     virtio_virtqueue_reset_region_cache(vq);
215 }
216 
217 /* virt queue functions */
218 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
219 {
220     VRing *vring = &vdev->vq[n].vring;
221 
222     if (!vring->num || !vring->desc || !vring->align) {
223         /* not yet setup -> nothing to do */
224         return;
225     }
226     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
227     vring->used = vring_align(vring->avail +
228                               offsetof(VRingAvail, ring[vring->num]),
229                               vring->align);
230     virtio_init_region_cache(vdev, n);
231 }
232 
233 /* Called within rcu_read_lock().  */
234 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
235                                   MemoryRegionCache *cache, int i)
236 {
237     address_space_read_cached(cache, i * sizeof(VRingDesc),
238                               desc, sizeof(VRingDesc));
239     virtio_tswap64s(vdev, &desc->addr);
240     virtio_tswap32s(vdev, &desc->len);
241     virtio_tswap16s(vdev, &desc->flags);
242     virtio_tswap16s(vdev, &desc->next);
243 }
244 
245 static void vring_packed_event_read(VirtIODevice *vdev,
246                                     MemoryRegionCache *cache,
247                                     VRingPackedDescEvent *e)
248 {
249     hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
250     hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
251 
252     address_space_read_cached(cache, off_flags, &e->flags,
253                               sizeof(e->flags));
254     /* Make sure flags is seen before off_wrap */
255     smp_rmb();
256     address_space_read_cached(cache, off_off, &e->off_wrap,
257                               sizeof(e->off_wrap));
258     virtio_tswap16s(vdev, &e->off_wrap);
259     virtio_tswap16s(vdev, &e->flags);
260 }
261 
262 static void vring_packed_off_wrap_write(VirtIODevice *vdev,
263                                         MemoryRegionCache *cache,
264                                         uint16_t off_wrap)
265 {
266     hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
267 
268     virtio_tswap16s(vdev, &off_wrap);
269     address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap));
270     address_space_cache_invalidate(cache, off, sizeof(off_wrap));
271 }
272 
273 static void vring_packed_flags_write(VirtIODevice *vdev,
274                                      MemoryRegionCache *cache, uint16_t flags)
275 {
276     hwaddr off = offsetof(VRingPackedDescEvent, flags);
277 
278     virtio_tswap16s(vdev, &flags);
279     address_space_write_cached(cache, off, &flags, sizeof(flags));
280     address_space_cache_invalidate(cache, off, sizeof(flags));
281 }
282 
283 /* Called within rcu_read_lock().  */
284 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
285 {
286     return qatomic_rcu_read(&vq->vring.caches);
287 }
288 
289 /* Called within rcu_read_lock().  */
290 static inline uint16_t vring_avail_flags(VirtQueue *vq)
291 {
292     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
293     hwaddr pa = offsetof(VRingAvail, flags);
294 
295     if (!caches) {
296         return 0;
297     }
298 
299     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
300 }
301 
302 /* Called within rcu_read_lock().  */
303 static inline uint16_t vring_avail_idx(VirtQueue *vq)
304 {
305     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
306     hwaddr pa = offsetof(VRingAvail, idx);
307 
308     if (!caches) {
309         return 0;
310     }
311 
312     vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
313     return vq->shadow_avail_idx;
314 }
315 
316 /* Called within rcu_read_lock().  */
317 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
318 {
319     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
320     hwaddr pa = offsetof(VRingAvail, ring[i]);
321 
322     if (!caches) {
323         return 0;
324     }
325 
326     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
327 }
328 
329 /* Called within rcu_read_lock().  */
330 static inline uint16_t vring_get_used_event(VirtQueue *vq)
331 {
332     return vring_avail_ring(vq, vq->vring.num);
333 }
334 
335 /* Called within rcu_read_lock().  */
336 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
337                                     int i)
338 {
339     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
340     hwaddr pa = offsetof(VRingUsed, ring[i]);
341 
342     if (!caches) {
343         return;
344     }
345 
346     virtio_tswap32s(vq->vdev, &uelem->id);
347     virtio_tswap32s(vq->vdev, &uelem->len);
348     address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
349     address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
350 }
351 
352 /* Called within rcu_read_lock().  */
353 static uint16_t vring_used_idx(VirtQueue *vq)
354 {
355     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
356     hwaddr pa = offsetof(VRingUsed, idx);
357 
358     if (!caches) {
359         return 0;
360     }
361 
362     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
363 }
364 
365 /* Called within rcu_read_lock().  */
366 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
367 {
368     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
369     hwaddr pa = offsetof(VRingUsed, idx);
370 
371     if (caches) {
372         virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
373         address_space_cache_invalidate(&caches->used, pa, sizeof(val));
374     }
375 
376     vq->used_idx = val;
377 }
378 
379 /* Called within rcu_read_lock().  */
380 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
381 {
382     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
383     VirtIODevice *vdev = vq->vdev;
384     hwaddr pa = offsetof(VRingUsed, flags);
385     uint16_t flags;
386 
387     if (!caches) {
388         return;
389     }
390 
391     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
392     virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
393     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
394 }
395 
396 /* Called within rcu_read_lock().  */
397 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
398 {
399     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
400     VirtIODevice *vdev = vq->vdev;
401     hwaddr pa = offsetof(VRingUsed, flags);
402     uint16_t flags;
403 
404     if (!caches) {
405         return;
406     }
407 
408     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
409     virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
410     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
411 }
412 
413 /* Called within rcu_read_lock().  */
414 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
415 {
416     VRingMemoryRegionCaches *caches;
417     hwaddr pa;
418     if (!vq->notification) {
419         return;
420     }
421 
422     caches = vring_get_region_caches(vq);
423     if (!caches) {
424         return;
425     }
426 
427     pa = offsetof(VRingUsed, ring[vq->vring.num]);
428     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
429     address_space_cache_invalidate(&caches->used, pa, sizeof(val));
430 }
431 
432 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
433 {
434     RCU_READ_LOCK_GUARD();
435 
436     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
437         vring_set_avail_event(vq, vring_avail_idx(vq));
438     } else if (enable) {
439         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
440     } else {
441         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
442     }
443     if (enable) {
444         /* Expose avail event/used flags before caller checks the avail idx. */
445         smp_mb();
446     }
447 }
448 
449 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
450 {
451     uint16_t off_wrap;
452     VRingPackedDescEvent e;
453     VRingMemoryRegionCaches *caches;
454 
455     RCU_READ_LOCK_GUARD();
456     caches = vring_get_region_caches(vq);
457     if (!caches) {
458         return;
459     }
460 
461     vring_packed_event_read(vq->vdev, &caches->used, &e);
462 
463     if (!enable) {
464         e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
465     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
466         off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
467         vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
468         /* Make sure off_wrap is wrote before flags */
469         smp_wmb();
470         e.flags = VRING_PACKED_EVENT_FLAG_DESC;
471     } else {
472         e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
473     }
474 
475     vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
476     if (enable) {
477         /* Expose avail event/used flags before caller checks the avail idx. */
478         smp_mb();
479     }
480 }
481 
482 bool virtio_queue_get_notification(VirtQueue *vq)
483 {
484     return vq->notification;
485 }
486 
487 void virtio_queue_set_notification(VirtQueue *vq, int enable)
488 {
489     vq->notification = enable;
490 
491     if (!vq->vring.desc) {
492         return;
493     }
494 
495     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
496         virtio_queue_packed_set_notification(vq, enable);
497     } else {
498         virtio_queue_split_set_notification(vq, enable);
499     }
500 }
501 
502 int virtio_queue_ready(VirtQueue *vq)
503 {
504     return vq->vring.avail != 0;
505 }
506 
507 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
508                                          uint16_t *flags,
509                                          MemoryRegionCache *cache,
510                                          int i)
511 {
512     address_space_read_cached(cache,
513                               i * sizeof(VRingPackedDesc) +
514                               offsetof(VRingPackedDesc, flags),
515                               flags, sizeof(*flags));
516     virtio_tswap16s(vdev, flags);
517 }
518 
519 static void vring_packed_desc_read(VirtIODevice *vdev,
520                                    VRingPackedDesc *desc,
521                                    MemoryRegionCache *cache,
522                                    int i, bool strict_order)
523 {
524     hwaddr off = i * sizeof(VRingPackedDesc);
525 
526     vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
527 
528     if (strict_order) {
529         /* Make sure flags is read before the rest fields. */
530         smp_rmb();
531     }
532 
533     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
534                               &desc->addr, sizeof(desc->addr));
535     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
536                               &desc->id, sizeof(desc->id));
537     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
538                               &desc->len, sizeof(desc->len));
539     virtio_tswap64s(vdev, &desc->addr);
540     virtio_tswap16s(vdev, &desc->id);
541     virtio_tswap32s(vdev, &desc->len);
542 }
543 
544 static void vring_packed_desc_write_data(VirtIODevice *vdev,
545                                          VRingPackedDesc *desc,
546                                          MemoryRegionCache *cache,
547                                          int i)
548 {
549     hwaddr off_id = i * sizeof(VRingPackedDesc) +
550                     offsetof(VRingPackedDesc, id);
551     hwaddr off_len = i * sizeof(VRingPackedDesc) +
552                     offsetof(VRingPackedDesc, len);
553 
554     virtio_tswap32s(vdev, &desc->len);
555     virtio_tswap16s(vdev, &desc->id);
556     address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
557     address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
558     address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
559     address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
560 }
561 
562 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
563                                           VRingPackedDesc *desc,
564                                           MemoryRegionCache *cache,
565                                           int i)
566 {
567     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
568 
569     virtio_tswap16s(vdev, &desc->flags);
570     address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags));
571     address_space_cache_invalidate(cache, off, sizeof(desc->flags));
572 }
573 
574 static void vring_packed_desc_write(VirtIODevice *vdev,
575                                     VRingPackedDesc *desc,
576                                     MemoryRegionCache *cache,
577                                     int i, bool strict_order)
578 {
579     vring_packed_desc_write_data(vdev, desc, cache, i);
580     if (strict_order) {
581         /* Make sure data is wrote before flags. */
582         smp_wmb();
583     }
584     vring_packed_desc_write_flags(vdev, desc, cache, i);
585 }
586 
587 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
588 {
589     bool avail, used;
590 
591     avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
592     used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
593     return (avail != used) && (avail == wrap_counter);
594 }
595 
596 /* Fetch avail_idx from VQ memory only when we really need to know if
597  * guest has added some buffers.
598  * Called within rcu_read_lock().  */
599 static int virtio_queue_empty_rcu(VirtQueue *vq)
600 {
601     if (virtio_device_disabled(vq->vdev)) {
602         return 1;
603     }
604 
605     if (unlikely(!vq->vring.avail)) {
606         return 1;
607     }
608 
609     if (vq->shadow_avail_idx != vq->last_avail_idx) {
610         return 0;
611     }
612 
613     return vring_avail_idx(vq) == vq->last_avail_idx;
614 }
615 
616 static int virtio_queue_split_empty(VirtQueue *vq)
617 {
618     bool empty;
619 
620     if (virtio_device_disabled(vq->vdev)) {
621         return 1;
622     }
623 
624     if (unlikely(!vq->vring.avail)) {
625         return 1;
626     }
627 
628     if (vq->shadow_avail_idx != vq->last_avail_idx) {
629         return 0;
630     }
631 
632     RCU_READ_LOCK_GUARD();
633     empty = vring_avail_idx(vq) == vq->last_avail_idx;
634     return empty;
635 }
636 
637 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
638 {
639     struct VRingPackedDesc desc;
640     VRingMemoryRegionCaches *cache;
641 
642     if (unlikely(!vq->vring.desc)) {
643         return 1;
644     }
645 
646     cache = vring_get_region_caches(vq);
647     if (!cache) {
648         return 1;
649     }
650 
651     vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
652                                  vq->last_avail_idx);
653 
654     return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
655 }
656 
657 static int virtio_queue_packed_empty(VirtQueue *vq)
658 {
659     RCU_READ_LOCK_GUARD();
660     return virtio_queue_packed_empty_rcu(vq);
661 }
662 
663 int virtio_queue_empty(VirtQueue *vq)
664 {
665     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
666         return virtio_queue_packed_empty(vq);
667     } else {
668         return virtio_queue_split_empty(vq);
669     }
670 }
671 
672 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
673                                unsigned int len)
674 {
675     AddressSpace *dma_as = vq->vdev->dma_as;
676     unsigned int offset;
677     int i;
678 
679     offset = 0;
680     for (i = 0; i < elem->in_num; i++) {
681         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
682 
683         dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
684                          elem->in_sg[i].iov_len,
685                          DMA_DIRECTION_FROM_DEVICE, size);
686 
687         offset += size;
688     }
689 
690     for (i = 0; i < elem->out_num; i++)
691         dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
692                          elem->out_sg[i].iov_len,
693                          DMA_DIRECTION_TO_DEVICE,
694                          elem->out_sg[i].iov_len);
695 }
696 
697 /* virtqueue_detach_element:
698  * @vq: The #VirtQueue
699  * @elem: The #VirtQueueElement
700  * @len: number of bytes written
701  *
702  * Detach the element from the virtqueue.  This function is suitable for device
703  * reset or other situations where a #VirtQueueElement is simply freed and will
704  * not be pushed or discarded.
705  */
706 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
707                               unsigned int len)
708 {
709     vq->inuse -= elem->ndescs;
710     virtqueue_unmap_sg(vq, elem, len);
711 }
712 
713 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
714 {
715     vq->last_avail_idx -= num;
716 }
717 
718 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
719 {
720     if (vq->last_avail_idx < num) {
721         vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
722         vq->last_avail_wrap_counter ^= 1;
723     } else {
724         vq->last_avail_idx -= num;
725     }
726 }
727 
728 /* virtqueue_unpop:
729  * @vq: The #VirtQueue
730  * @elem: The #VirtQueueElement
731  * @len: number of bytes written
732  *
733  * Pretend the most recent element wasn't popped from the virtqueue.  The next
734  * call to virtqueue_pop() will refetch the element.
735  */
736 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
737                      unsigned int len)
738 {
739 
740     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
741         virtqueue_packed_rewind(vq, 1);
742     } else {
743         virtqueue_split_rewind(vq, 1);
744     }
745 
746     virtqueue_detach_element(vq, elem, len);
747 }
748 
749 /* virtqueue_rewind:
750  * @vq: The #VirtQueue
751  * @num: Number of elements to push back
752  *
753  * Pretend that elements weren't popped from the virtqueue.  The next
754  * virtqueue_pop() will refetch the oldest element.
755  *
756  * Use virtqueue_unpop() instead if you have a VirtQueueElement.
757  *
758  * Returns: true on success, false if @num is greater than the number of in use
759  * elements.
760  */
761 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
762 {
763     if (num > vq->inuse) {
764         return false;
765     }
766 
767     vq->inuse -= num;
768     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
769         virtqueue_packed_rewind(vq, num);
770     } else {
771         virtqueue_split_rewind(vq, num);
772     }
773     return true;
774 }
775 
776 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
777                     unsigned int len, unsigned int idx)
778 {
779     VRingUsedElem uelem;
780 
781     if (unlikely(!vq->vring.used)) {
782         return;
783     }
784 
785     idx = (idx + vq->used_idx) % vq->vring.num;
786 
787     uelem.id = elem->index;
788     uelem.len = len;
789     vring_used_write(vq, &uelem, idx);
790 }
791 
792 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
793                                   unsigned int len, unsigned int idx)
794 {
795     vq->used_elems[idx].index = elem->index;
796     vq->used_elems[idx].len = len;
797     vq->used_elems[idx].ndescs = elem->ndescs;
798 }
799 
800 static void virtqueue_packed_fill_desc(VirtQueue *vq,
801                                        const VirtQueueElement *elem,
802                                        unsigned int idx,
803                                        bool strict_order)
804 {
805     uint16_t head;
806     VRingMemoryRegionCaches *caches;
807     VRingPackedDesc desc = {
808         .id = elem->index,
809         .len = elem->len,
810     };
811     bool wrap_counter = vq->used_wrap_counter;
812 
813     if (unlikely(!vq->vring.desc)) {
814         return;
815     }
816 
817     head = vq->used_idx + idx;
818     if (head >= vq->vring.num) {
819         head -= vq->vring.num;
820         wrap_counter ^= 1;
821     }
822     if (wrap_counter) {
823         desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
824         desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
825     } else {
826         desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
827         desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
828     }
829 
830     caches = vring_get_region_caches(vq);
831     if (!caches) {
832         return;
833     }
834 
835     vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
836 }
837 
838 /* Called within rcu_read_lock().  */
839 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
840                     unsigned int len, unsigned int idx)
841 {
842     trace_virtqueue_fill(vq, elem, len, idx);
843 
844     virtqueue_unmap_sg(vq, elem, len);
845 
846     if (virtio_device_disabled(vq->vdev)) {
847         return;
848     }
849 
850     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
851         virtqueue_packed_fill(vq, elem, len, idx);
852     } else {
853         virtqueue_split_fill(vq, elem, len, idx);
854     }
855 }
856 
857 /* Called within rcu_read_lock().  */
858 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
859 {
860     uint16_t old, new;
861 
862     if (unlikely(!vq->vring.used)) {
863         return;
864     }
865 
866     /* Make sure buffer is written before we update index. */
867     smp_wmb();
868     trace_virtqueue_flush(vq, count);
869     old = vq->used_idx;
870     new = old + count;
871     vring_used_idx_set(vq, new);
872     vq->inuse -= count;
873     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
874         vq->signalled_used_valid = false;
875 }
876 
877 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
878 {
879     unsigned int i, ndescs = 0;
880 
881     if (unlikely(!vq->vring.desc)) {
882         return;
883     }
884 
885     for (i = 1; i < count; i++) {
886         virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
887         ndescs += vq->used_elems[i].ndescs;
888     }
889     virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
890     ndescs += vq->used_elems[0].ndescs;
891 
892     vq->inuse -= ndescs;
893     vq->used_idx += ndescs;
894     if (vq->used_idx >= vq->vring.num) {
895         vq->used_idx -= vq->vring.num;
896         vq->used_wrap_counter ^= 1;
897     }
898 }
899 
900 void virtqueue_flush(VirtQueue *vq, unsigned int count)
901 {
902     if (virtio_device_disabled(vq->vdev)) {
903         vq->inuse -= count;
904         return;
905     }
906 
907     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
908         virtqueue_packed_flush(vq, count);
909     } else {
910         virtqueue_split_flush(vq, count);
911     }
912 }
913 
914 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
915                     unsigned int len)
916 {
917     RCU_READ_LOCK_GUARD();
918     virtqueue_fill(vq, elem, len, 0);
919     virtqueue_flush(vq, 1);
920 }
921 
922 /* Called within rcu_read_lock().  */
923 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
924 {
925     uint16_t num_heads = vring_avail_idx(vq) - idx;
926 
927     /* Check it isn't doing very strange things with descriptor numbers. */
928     if (num_heads > vq->vring.num) {
929         virtio_error(vq->vdev, "Guest moved used index from %u to %u",
930                      idx, vq->shadow_avail_idx);
931         return -EINVAL;
932     }
933     /* On success, callers read a descriptor at vq->last_avail_idx.
934      * Make sure descriptor read does not bypass avail index read. */
935     if (num_heads) {
936         smp_rmb();
937     }
938 
939     return num_heads;
940 }
941 
942 /* Called within rcu_read_lock().  */
943 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
944                                unsigned int *head)
945 {
946     /* Grab the next descriptor number they're advertising, and increment
947      * the index we've seen. */
948     *head = vring_avail_ring(vq, idx % vq->vring.num);
949 
950     /* If their number is silly, that's a fatal mistake. */
951     if (*head >= vq->vring.num) {
952         virtio_error(vq->vdev, "Guest says index %u is available", *head);
953         return false;
954     }
955 
956     return true;
957 }
958 
959 enum {
960     VIRTQUEUE_READ_DESC_ERROR = -1,
961     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
962     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
963 };
964 
965 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
966                                           MemoryRegionCache *desc_cache,
967                                           unsigned int max, unsigned int *next)
968 {
969     /* If this descriptor says it doesn't chain, we're done. */
970     if (!(desc->flags & VRING_DESC_F_NEXT)) {
971         return VIRTQUEUE_READ_DESC_DONE;
972     }
973 
974     /* Check they're not leading us off end of descriptors. */
975     *next = desc->next;
976     /* Make sure compiler knows to grab that: we don't want it changing! */
977     smp_wmb();
978 
979     if (*next >= max) {
980         virtio_error(vdev, "Desc next is %u", *next);
981         return VIRTQUEUE_READ_DESC_ERROR;
982     }
983 
984     vring_split_desc_read(vdev, desc, desc_cache, *next);
985     return VIRTQUEUE_READ_DESC_MORE;
986 }
987 
988 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
989                             unsigned int *in_bytes, unsigned int *out_bytes,
990                             unsigned max_in_bytes, unsigned max_out_bytes)
991 {
992     VirtIODevice *vdev = vq->vdev;
993     unsigned int max, idx;
994     unsigned int total_bufs, in_total, out_total;
995     VRingMemoryRegionCaches *caches;
996     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
997     int64_t len = 0;
998     int rc;
999 
1000     RCU_READ_LOCK_GUARD();
1001 
1002     idx = vq->last_avail_idx;
1003     total_bufs = in_total = out_total = 0;
1004 
1005     max = vq->vring.num;
1006     caches = vring_get_region_caches(vq);
1007     if (!caches) {
1008         goto err;
1009     }
1010 
1011     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1012         MemoryRegionCache *desc_cache = &caches->desc;
1013         unsigned int num_bufs;
1014         VRingDesc desc;
1015         unsigned int i;
1016 
1017         num_bufs = total_bufs;
1018 
1019         if (!virtqueue_get_head(vq, idx++, &i)) {
1020             goto err;
1021         }
1022 
1023         vring_split_desc_read(vdev, &desc, desc_cache, i);
1024 
1025         if (desc.flags & VRING_DESC_F_INDIRECT) {
1026             if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1027                 virtio_error(vdev, "Invalid size for indirect buffer table");
1028                 goto err;
1029             }
1030 
1031             /* If we've got too many, that implies a descriptor loop. */
1032             if (num_bufs >= max) {
1033                 virtio_error(vdev, "Looped descriptor");
1034                 goto err;
1035             }
1036 
1037             /* loop over the indirect descriptor table */
1038             len = address_space_cache_init(&indirect_desc_cache,
1039                                            vdev->dma_as,
1040                                            desc.addr, desc.len, false);
1041             desc_cache = &indirect_desc_cache;
1042             if (len < desc.len) {
1043                 virtio_error(vdev, "Cannot map indirect buffer");
1044                 goto err;
1045             }
1046 
1047             max = desc.len / sizeof(VRingDesc);
1048             num_bufs = i = 0;
1049             vring_split_desc_read(vdev, &desc, desc_cache, i);
1050         }
1051 
1052         do {
1053             /* If we've got too many, that implies a descriptor loop. */
1054             if (++num_bufs > max) {
1055                 virtio_error(vdev, "Looped descriptor");
1056                 goto err;
1057             }
1058 
1059             if (desc.flags & VRING_DESC_F_WRITE) {
1060                 in_total += desc.len;
1061             } else {
1062                 out_total += desc.len;
1063             }
1064             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1065                 goto done;
1066             }
1067 
1068             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1069         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1070 
1071         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1072             goto err;
1073         }
1074 
1075         if (desc_cache == &indirect_desc_cache) {
1076             address_space_cache_destroy(&indirect_desc_cache);
1077             total_bufs++;
1078         } else {
1079             total_bufs = num_bufs;
1080         }
1081     }
1082 
1083     if (rc < 0) {
1084         goto err;
1085     }
1086 
1087 done:
1088     address_space_cache_destroy(&indirect_desc_cache);
1089     if (in_bytes) {
1090         *in_bytes = in_total;
1091     }
1092     if (out_bytes) {
1093         *out_bytes = out_total;
1094     }
1095     return;
1096 
1097 err:
1098     in_total = out_total = 0;
1099     goto done;
1100 }
1101 
1102 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1103                                            VRingPackedDesc *desc,
1104                                            MemoryRegionCache
1105                                            *desc_cache,
1106                                            unsigned int max,
1107                                            unsigned int *next,
1108                                            bool indirect)
1109 {
1110     /* If this descriptor says it doesn't chain, we're done. */
1111     if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1112         return VIRTQUEUE_READ_DESC_DONE;
1113     }
1114 
1115     ++*next;
1116     if (*next == max) {
1117         if (indirect) {
1118             return VIRTQUEUE_READ_DESC_DONE;
1119         } else {
1120             (*next) -= vq->vring.num;
1121         }
1122     }
1123 
1124     vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1125     return VIRTQUEUE_READ_DESC_MORE;
1126 }
1127 
1128 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1129                                              unsigned int *in_bytes,
1130                                              unsigned int *out_bytes,
1131                                              unsigned max_in_bytes,
1132                                              unsigned max_out_bytes)
1133 {
1134     VirtIODevice *vdev = vq->vdev;
1135     unsigned int max, idx;
1136     unsigned int total_bufs, in_total, out_total;
1137     MemoryRegionCache *desc_cache;
1138     VRingMemoryRegionCaches *caches;
1139     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1140     int64_t len = 0;
1141     VRingPackedDesc desc;
1142     bool wrap_counter;
1143 
1144     RCU_READ_LOCK_GUARD();
1145     idx = vq->last_avail_idx;
1146     wrap_counter = vq->last_avail_wrap_counter;
1147     total_bufs = in_total = out_total = 0;
1148 
1149     max = vq->vring.num;
1150     caches = vring_get_region_caches(vq);
1151     if (!caches) {
1152         goto err;
1153     }
1154 
1155     for (;;) {
1156         unsigned int num_bufs = total_bufs;
1157         unsigned int i = idx;
1158         int rc;
1159 
1160         desc_cache = &caches->desc;
1161         vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1162         if (!is_desc_avail(desc.flags, wrap_counter)) {
1163             break;
1164         }
1165 
1166         if (desc.flags & VRING_DESC_F_INDIRECT) {
1167             if (desc.len % sizeof(VRingPackedDesc)) {
1168                 virtio_error(vdev, "Invalid size for indirect buffer table");
1169                 goto err;
1170             }
1171 
1172             /* If we've got too many, that implies a descriptor loop. */
1173             if (num_bufs >= max) {
1174                 virtio_error(vdev, "Looped descriptor");
1175                 goto err;
1176             }
1177 
1178             /* loop over the indirect descriptor table */
1179             len = address_space_cache_init(&indirect_desc_cache,
1180                                            vdev->dma_as,
1181                                            desc.addr, desc.len, false);
1182             desc_cache = &indirect_desc_cache;
1183             if (len < desc.len) {
1184                 virtio_error(vdev, "Cannot map indirect buffer");
1185                 goto err;
1186             }
1187 
1188             max = desc.len / sizeof(VRingPackedDesc);
1189             num_bufs = i = 0;
1190             vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1191         }
1192 
1193         do {
1194             /* If we've got too many, that implies a descriptor loop. */
1195             if (++num_bufs > max) {
1196                 virtio_error(vdev, "Looped descriptor");
1197                 goto err;
1198             }
1199 
1200             if (desc.flags & VRING_DESC_F_WRITE) {
1201                 in_total += desc.len;
1202             } else {
1203                 out_total += desc.len;
1204             }
1205             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1206                 goto done;
1207             }
1208 
1209             rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1210                                                  &i, desc_cache ==
1211                                                  &indirect_desc_cache);
1212         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1213 
1214         if (desc_cache == &indirect_desc_cache) {
1215             address_space_cache_destroy(&indirect_desc_cache);
1216             total_bufs++;
1217             idx++;
1218         } else {
1219             idx += num_bufs - total_bufs;
1220             total_bufs = num_bufs;
1221         }
1222 
1223         if (idx >= vq->vring.num) {
1224             idx -= vq->vring.num;
1225             wrap_counter ^= 1;
1226         }
1227     }
1228 
1229     /* Record the index and wrap counter for a kick we want */
1230     vq->shadow_avail_idx = idx;
1231     vq->shadow_avail_wrap_counter = wrap_counter;
1232 done:
1233     address_space_cache_destroy(&indirect_desc_cache);
1234     if (in_bytes) {
1235         *in_bytes = in_total;
1236     }
1237     if (out_bytes) {
1238         *out_bytes = out_total;
1239     }
1240     return;
1241 
1242 err:
1243     in_total = out_total = 0;
1244     goto done;
1245 }
1246 
1247 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1248                                unsigned int *out_bytes,
1249                                unsigned max_in_bytes, unsigned max_out_bytes)
1250 {
1251     uint16_t desc_size;
1252     VRingMemoryRegionCaches *caches;
1253 
1254     if (unlikely(!vq->vring.desc)) {
1255         goto err;
1256     }
1257 
1258     caches = vring_get_region_caches(vq);
1259     if (!caches) {
1260         goto err;
1261     }
1262 
1263     desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1264                                 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1265     if (caches->desc.len < vq->vring.num * desc_size) {
1266         virtio_error(vq->vdev, "Cannot map descriptor ring");
1267         goto err;
1268     }
1269 
1270     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1271         virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1272                                          max_in_bytes, max_out_bytes);
1273     } else {
1274         virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1275                                         max_in_bytes, max_out_bytes);
1276     }
1277 
1278     return;
1279 err:
1280     if (in_bytes) {
1281         *in_bytes = 0;
1282     }
1283     if (out_bytes) {
1284         *out_bytes = 0;
1285     }
1286 }
1287 
1288 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1289                           unsigned int out_bytes)
1290 {
1291     unsigned int in_total, out_total;
1292 
1293     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1294     return in_bytes <= in_total && out_bytes <= out_total;
1295 }
1296 
1297 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1298                                hwaddr *addr, struct iovec *iov,
1299                                unsigned int max_num_sg, bool is_write,
1300                                hwaddr pa, size_t sz)
1301 {
1302     bool ok = false;
1303     unsigned num_sg = *p_num_sg;
1304     assert(num_sg <= max_num_sg);
1305 
1306     if (!sz) {
1307         virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1308         goto out;
1309     }
1310 
1311     while (sz) {
1312         hwaddr len = sz;
1313 
1314         if (num_sg == max_num_sg) {
1315             virtio_error(vdev, "virtio: too many write descriptors in "
1316                                "indirect table");
1317             goto out;
1318         }
1319 
1320         iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1321                                               is_write ?
1322                                               DMA_DIRECTION_FROM_DEVICE :
1323                                               DMA_DIRECTION_TO_DEVICE);
1324         if (!iov[num_sg].iov_base) {
1325             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1326             goto out;
1327         }
1328 
1329         iov[num_sg].iov_len = len;
1330         addr[num_sg] = pa;
1331 
1332         sz -= len;
1333         pa += len;
1334         num_sg++;
1335     }
1336     ok = true;
1337 
1338 out:
1339     *p_num_sg = num_sg;
1340     return ok;
1341 }
1342 
1343 /* Only used by error code paths before we have a VirtQueueElement (therefore
1344  * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1345  * yet.
1346  */
1347 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1348                                     struct iovec *iov)
1349 {
1350     unsigned int i;
1351 
1352     for (i = 0; i < out_num + in_num; i++) {
1353         int is_write = i >= out_num;
1354 
1355         cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1356         iov++;
1357     }
1358 }
1359 
1360 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1361                                 hwaddr *addr, unsigned int num_sg,
1362                                 bool is_write)
1363 {
1364     unsigned int i;
1365     hwaddr len;
1366 
1367     for (i = 0; i < num_sg; i++) {
1368         len = sg[i].iov_len;
1369         sg[i].iov_base = dma_memory_map(vdev->dma_as,
1370                                         addr[i], &len, is_write ?
1371                                         DMA_DIRECTION_FROM_DEVICE :
1372                                         DMA_DIRECTION_TO_DEVICE);
1373         if (!sg[i].iov_base) {
1374             error_report("virtio: error trying to map MMIO memory");
1375             exit(1);
1376         }
1377         if (len != sg[i].iov_len) {
1378             error_report("virtio: unexpected memory split");
1379             exit(1);
1380         }
1381     }
1382 }
1383 
1384 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1385 {
1386     virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1387     virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1388                                                                         false);
1389 }
1390 
1391 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1392 {
1393     VirtQueueElement *elem;
1394     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1395     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1396     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1397     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1398     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1399     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1400 
1401     assert(sz >= sizeof(VirtQueueElement));
1402     elem = g_malloc(out_sg_end);
1403     trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1404     elem->out_num = out_num;
1405     elem->in_num = in_num;
1406     elem->in_addr = (void *)elem + in_addr_ofs;
1407     elem->out_addr = (void *)elem + out_addr_ofs;
1408     elem->in_sg = (void *)elem + in_sg_ofs;
1409     elem->out_sg = (void *)elem + out_sg_ofs;
1410     return elem;
1411 }
1412 
1413 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1414 {
1415     unsigned int i, head, max;
1416     VRingMemoryRegionCaches *caches;
1417     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1418     MemoryRegionCache *desc_cache;
1419     int64_t len;
1420     VirtIODevice *vdev = vq->vdev;
1421     VirtQueueElement *elem = NULL;
1422     unsigned out_num, in_num, elem_entries;
1423     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1424     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1425     VRingDesc desc;
1426     int rc;
1427 
1428     RCU_READ_LOCK_GUARD();
1429     if (virtio_queue_empty_rcu(vq)) {
1430         goto done;
1431     }
1432     /* Needed after virtio_queue_empty(), see comment in
1433      * virtqueue_num_heads(). */
1434     smp_rmb();
1435 
1436     /* When we start there are none of either input nor output. */
1437     out_num = in_num = elem_entries = 0;
1438 
1439     max = vq->vring.num;
1440 
1441     if (vq->inuse >= vq->vring.num) {
1442         virtio_error(vdev, "Virtqueue size exceeded");
1443         goto done;
1444     }
1445 
1446     if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1447         goto done;
1448     }
1449 
1450     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1451         vring_set_avail_event(vq, vq->last_avail_idx);
1452     }
1453 
1454     i = head;
1455 
1456     caches = vring_get_region_caches(vq);
1457     if (!caches) {
1458         virtio_error(vdev, "Region caches not initialized");
1459         goto done;
1460     }
1461 
1462     if (caches->desc.len < max * sizeof(VRingDesc)) {
1463         virtio_error(vdev, "Cannot map descriptor ring");
1464         goto done;
1465     }
1466 
1467     desc_cache = &caches->desc;
1468     vring_split_desc_read(vdev, &desc, desc_cache, i);
1469     if (desc.flags & VRING_DESC_F_INDIRECT) {
1470         if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1471             virtio_error(vdev, "Invalid size for indirect buffer table");
1472             goto done;
1473         }
1474 
1475         /* loop over the indirect descriptor table */
1476         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1477                                        desc.addr, desc.len, false);
1478         desc_cache = &indirect_desc_cache;
1479         if (len < desc.len) {
1480             virtio_error(vdev, "Cannot map indirect buffer");
1481             goto done;
1482         }
1483 
1484         max = desc.len / sizeof(VRingDesc);
1485         i = 0;
1486         vring_split_desc_read(vdev, &desc, desc_cache, i);
1487     }
1488 
1489     /* Collect all the descriptors */
1490     do {
1491         bool map_ok;
1492 
1493         if (desc.flags & VRING_DESC_F_WRITE) {
1494             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1495                                         iov + out_num,
1496                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1497                                         desc.addr, desc.len);
1498         } else {
1499             if (in_num) {
1500                 virtio_error(vdev, "Incorrect order for descriptors");
1501                 goto err_undo_map;
1502             }
1503             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1504                                         VIRTQUEUE_MAX_SIZE, false,
1505                                         desc.addr, desc.len);
1506         }
1507         if (!map_ok) {
1508             goto err_undo_map;
1509         }
1510 
1511         /* If we've got too many, that implies a descriptor loop. */
1512         if (++elem_entries > max) {
1513             virtio_error(vdev, "Looped descriptor");
1514             goto err_undo_map;
1515         }
1516 
1517         rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1518     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1519 
1520     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1521         goto err_undo_map;
1522     }
1523 
1524     /* Now copy what we have collected and mapped */
1525     elem = virtqueue_alloc_element(sz, out_num, in_num);
1526     elem->index = head;
1527     elem->ndescs = 1;
1528     for (i = 0; i < out_num; i++) {
1529         elem->out_addr[i] = addr[i];
1530         elem->out_sg[i] = iov[i];
1531     }
1532     for (i = 0; i < in_num; i++) {
1533         elem->in_addr[i] = addr[out_num + i];
1534         elem->in_sg[i] = iov[out_num + i];
1535     }
1536 
1537     vq->inuse++;
1538 
1539     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1540 done:
1541     address_space_cache_destroy(&indirect_desc_cache);
1542 
1543     return elem;
1544 
1545 err_undo_map:
1546     virtqueue_undo_map_desc(out_num, in_num, iov);
1547     goto done;
1548 }
1549 
1550 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1551 {
1552     unsigned int i, max;
1553     VRingMemoryRegionCaches *caches;
1554     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1555     MemoryRegionCache *desc_cache;
1556     int64_t len;
1557     VirtIODevice *vdev = vq->vdev;
1558     VirtQueueElement *elem = NULL;
1559     unsigned out_num, in_num, elem_entries;
1560     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1561     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1562     VRingPackedDesc desc;
1563     uint16_t id;
1564     int rc;
1565 
1566     RCU_READ_LOCK_GUARD();
1567     if (virtio_queue_packed_empty_rcu(vq)) {
1568         goto done;
1569     }
1570 
1571     /* When we start there are none of either input nor output. */
1572     out_num = in_num = elem_entries = 0;
1573 
1574     max = vq->vring.num;
1575 
1576     if (vq->inuse >= vq->vring.num) {
1577         virtio_error(vdev, "Virtqueue size exceeded");
1578         goto done;
1579     }
1580 
1581     i = vq->last_avail_idx;
1582 
1583     caches = vring_get_region_caches(vq);
1584     if (!caches) {
1585         virtio_error(vdev, "Region caches not initialized");
1586         goto done;
1587     }
1588 
1589     if (caches->desc.len < max * sizeof(VRingDesc)) {
1590         virtio_error(vdev, "Cannot map descriptor ring");
1591         goto done;
1592     }
1593 
1594     desc_cache = &caches->desc;
1595     vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1596     id = desc.id;
1597     if (desc.flags & VRING_DESC_F_INDIRECT) {
1598         if (desc.len % sizeof(VRingPackedDesc)) {
1599             virtio_error(vdev, "Invalid size for indirect buffer table");
1600             goto done;
1601         }
1602 
1603         /* loop over the indirect descriptor table */
1604         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1605                                        desc.addr, desc.len, false);
1606         desc_cache = &indirect_desc_cache;
1607         if (len < desc.len) {
1608             virtio_error(vdev, "Cannot map indirect buffer");
1609             goto done;
1610         }
1611 
1612         max = desc.len / sizeof(VRingPackedDesc);
1613         i = 0;
1614         vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1615     }
1616 
1617     /* Collect all the descriptors */
1618     do {
1619         bool map_ok;
1620 
1621         if (desc.flags & VRING_DESC_F_WRITE) {
1622             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1623                                         iov + out_num,
1624                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1625                                         desc.addr, desc.len);
1626         } else {
1627             if (in_num) {
1628                 virtio_error(vdev, "Incorrect order for descriptors");
1629                 goto err_undo_map;
1630             }
1631             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1632                                         VIRTQUEUE_MAX_SIZE, false,
1633                                         desc.addr, desc.len);
1634         }
1635         if (!map_ok) {
1636             goto err_undo_map;
1637         }
1638 
1639         /* If we've got too many, that implies a descriptor loop. */
1640         if (++elem_entries > max) {
1641             virtio_error(vdev, "Looped descriptor");
1642             goto err_undo_map;
1643         }
1644 
1645         rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1646                                              desc_cache ==
1647                                              &indirect_desc_cache);
1648     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1649 
1650     /* Now copy what we have collected and mapped */
1651     elem = virtqueue_alloc_element(sz, out_num, in_num);
1652     for (i = 0; i < out_num; i++) {
1653         elem->out_addr[i] = addr[i];
1654         elem->out_sg[i] = iov[i];
1655     }
1656     for (i = 0; i < in_num; i++) {
1657         elem->in_addr[i] = addr[out_num + i];
1658         elem->in_sg[i] = iov[out_num + i];
1659     }
1660 
1661     elem->index = id;
1662     elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1663     vq->last_avail_idx += elem->ndescs;
1664     vq->inuse += elem->ndescs;
1665 
1666     if (vq->last_avail_idx >= vq->vring.num) {
1667         vq->last_avail_idx -= vq->vring.num;
1668         vq->last_avail_wrap_counter ^= 1;
1669     }
1670 
1671     vq->shadow_avail_idx = vq->last_avail_idx;
1672     vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1673 
1674     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1675 done:
1676     address_space_cache_destroy(&indirect_desc_cache);
1677 
1678     return elem;
1679 
1680 err_undo_map:
1681     virtqueue_undo_map_desc(out_num, in_num, iov);
1682     goto done;
1683 }
1684 
1685 void *virtqueue_pop(VirtQueue *vq, size_t sz)
1686 {
1687     if (virtio_device_disabled(vq->vdev)) {
1688         return NULL;
1689     }
1690 
1691     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1692         return virtqueue_packed_pop(vq, sz);
1693     } else {
1694         return virtqueue_split_pop(vq, sz);
1695     }
1696 }
1697 
1698 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1699 {
1700     VRingMemoryRegionCaches *caches;
1701     MemoryRegionCache *desc_cache;
1702     unsigned int dropped = 0;
1703     VirtQueueElement elem = {};
1704     VirtIODevice *vdev = vq->vdev;
1705     VRingPackedDesc desc;
1706 
1707     caches = vring_get_region_caches(vq);
1708     if (!caches) {
1709         return 0;
1710     }
1711 
1712     desc_cache = &caches->desc;
1713 
1714     virtio_queue_set_notification(vq, 0);
1715 
1716     while (vq->inuse < vq->vring.num) {
1717         unsigned int idx = vq->last_avail_idx;
1718         /*
1719          * works similar to virtqueue_pop but does not map buffers
1720          * and does not allocate any memory.
1721          */
1722         vring_packed_desc_read(vdev, &desc, desc_cache,
1723                                vq->last_avail_idx , true);
1724         if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1725             break;
1726         }
1727         elem.index = desc.id;
1728         elem.ndescs = 1;
1729         while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1730                                                vq->vring.num, &idx, false)) {
1731             ++elem.ndescs;
1732         }
1733         /*
1734          * immediately push the element, nothing to unmap
1735          * as both in_num and out_num are set to 0.
1736          */
1737         virtqueue_push(vq, &elem, 0);
1738         dropped++;
1739         vq->last_avail_idx += elem.ndescs;
1740         if (vq->last_avail_idx >= vq->vring.num) {
1741             vq->last_avail_idx -= vq->vring.num;
1742             vq->last_avail_wrap_counter ^= 1;
1743         }
1744     }
1745 
1746     return dropped;
1747 }
1748 
1749 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1750 {
1751     unsigned int dropped = 0;
1752     VirtQueueElement elem = {};
1753     VirtIODevice *vdev = vq->vdev;
1754     bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1755 
1756     while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1757         /* works similar to virtqueue_pop but does not map buffers
1758         * and does not allocate any memory */
1759         smp_rmb();
1760         if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1761             break;
1762         }
1763         vq->inuse++;
1764         vq->last_avail_idx++;
1765         if (fEventIdx) {
1766             vring_set_avail_event(vq, vq->last_avail_idx);
1767         }
1768         /* immediately push the element, nothing to unmap
1769          * as both in_num and out_num are set to 0 */
1770         virtqueue_push(vq, &elem, 0);
1771         dropped++;
1772     }
1773 
1774     return dropped;
1775 }
1776 
1777 /* virtqueue_drop_all:
1778  * @vq: The #VirtQueue
1779  * Drops all queued buffers and indicates them to the guest
1780  * as if they are done. Useful when buffers can not be
1781  * processed but must be returned to the guest.
1782  */
1783 unsigned int virtqueue_drop_all(VirtQueue *vq)
1784 {
1785     struct VirtIODevice *vdev = vq->vdev;
1786 
1787     if (virtio_device_disabled(vq->vdev)) {
1788         return 0;
1789     }
1790 
1791     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1792         return virtqueue_packed_drop_all(vq);
1793     } else {
1794         return virtqueue_split_drop_all(vq);
1795     }
1796 }
1797 
1798 /* Reading and writing a structure directly to QEMUFile is *awful*, but
1799  * it is what QEMU has always done by mistake.  We can change it sooner
1800  * or later by bumping the version number of the affected vm states.
1801  * In the meanwhile, since the in-memory layout of VirtQueueElement
1802  * has changed, we need to marshal to and from the layout that was
1803  * used before the change.
1804  */
1805 typedef struct VirtQueueElementOld {
1806     unsigned int index;
1807     unsigned int out_num;
1808     unsigned int in_num;
1809     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1810     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1811     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1812     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1813 } VirtQueueElementOld;
1814 
1815 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1816 {
1817     VirtQueueElement *elem;
1818     VirtQueueElementOld data;
1819     int i;
1820 
1821     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1822 
1823     /* TODO: teach all callers that this can fail, and return failure instead
1824      * of asserting here.
1825      * This is just one thing (there are probably more) that must be
1826      * fixed before we can allow NDEBUG compilation.
1827      */
1828     assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1829     assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1830 
1831     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1832     elem->index = data.index;
1833 
1834     for (i = 0; i < elem->in_num; i++) {
1835         elem->in_addr[i] = data.in_addr[i];
1836     }
1837 
1838     for (i = 0; i < elem->out_num; i++) {
1839         elem->out_addr[i] = data.out_addr[i];
1840     }
1841 
1842     for (i = 0; i < elem->in_num; i++) {
1843         /* Base is overwritten by virtqueue_map.  */
1844         elem->in_sg[i].iov_base = 0;
1845         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1846     }
1847 
1848     for (i = 0; i < elem->out_num; i++) {
1849         /* Base is overwritten by virtqueue_map.  */
1850         elem->out_sg[i].iov_base = 0;
1851         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1852     }
1853 
1854     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1855         qemu_get_be32s(f, &elem->ndescs);
1856     }
1857 
1858     virtqueue_map(vdev, elem);
1859     return elem;
1860 }
1861 
1862 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1863                                 VirtQueueElement *elem)
1864 {
1865     VirtQueueElementOld data;
1866     int i;
1867 
1868     memset(&data, 0, sizeof(data));
1869     data.index = elem->index;
1870     data.in_num = elem->in_num;
1871     data.out_num = elem->out_num;
1872 
1873     for (i = 0; i < elem->in_num; i++) {
1874         data.in_addr[i] = elem->in_addr[i];
1875     }
1876 
1877     for (i = 0; i < elem->out_num; i++) {
1878         data.out_addr[i] = elem->out_addr[i];
1879     }
1880 
1881     for (i = 0; i < elem->in_num; i++) {
1882         /* Base is overwritten by virtqueue_map when loading.  Do not
1883          * save it, as it would leak the QEMU address space layout.  */
1884         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1885     }
1886 
1887     for (i = 0; i < elem->out_num; i++) {
1888         /* Do not save iov_base as above.  */
1889         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1890     }
1891 
1892     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1893         qemu_put_be32s(f, &elem->ndescs);
1894     }
1895 
1896     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1897 }
1898 
1899 /* virtio device */
1900 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1901 {
1902     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1903     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1904 
1905     if (virtio_device_disabled(vdev)) {
1906         return;
1907     }
1908 
1909     if (k->notify) {
1910         k->notify(qbus->parent, vector);
1911     }
1912 }
1913 
1914 void virtio_update_irq(VirtIODevice *vdev)
1915 {
1916     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1917 }
1918 
1919 static int virtio_validate_features(VirtIODevice *vdev)
1920 {
1921     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1922 
1923     if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1924         !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1925         return -EFAULT;
1926     }
1927 
1928     if (k->validate_features) {
1929         return k->validate_features(vdev);
1930     } else {
1931         return 0;
1932     }
1933 }
1934 
1935 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1936 {
1937     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1938     trace_virtio_set_status(vdev, val);
1939 
1940     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1941         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
1942             val & VIRTIO_CONFIG_S_FEATURES_OK) {
1943             int ret = virtio_validate_features(vdev);
1944 
1945             if (ret) {
1946                 return ret;
1947             }
1948         }
1949     }
1950 
1951     if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
1952         (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1953         virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
1954     }
1955 
1956     if (k->set_status) {
1957         k->set_status(vdev, val);
1958     }
1959     vdev->status = val;
1960 
1961     return 0;
1962 }
1963 
1964 static enum virtio_device_endian virtio_default_endian(void)
1965 {
1966     if (target_words_bigendian()) {
1967         return VIRTIO_DEVICE_ENDIAN_BIG;
1968     } else {
1969         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1970     }
1971 }
1972 
1973 static enum virtio_device_endian virtio_current_cpu_endian(void)
1974 {
1975     if (cpu_virtio_is_big_endian(current_cpu)) {
1976         return VIRTIO_DEVICE_ENDIAN_BIG;
1977     } else {
1978         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1979     }
1980 }
1981 
1982 void virtio_reset(void *opaque)
1983 {
1984     VirtIODevice *vdev = opaque;
1985     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1986     int i;
1987 
1988     virtio_set_status(vdev, 0);
1989     if (current_cpu) {
1990         /* Guest initiated reset */
1991         vdev->device_endian = virtio_current_cpu_endian();
1992     } else {
1993         /* System reset */
1994         vdev->device_endian = virtio_default_endian();
1995     }
1996 
1997     if (k->reset) {
1998         k->reset(vdev);
1999     }
2000 
2001     vdev->start_on_kick = false;
2002     vdev->started = false;
2003     vdev->broken = false;
2004     vdev->guest_features = 0;
2005     vdev->queue_sel = 0;
2006     vdev->status = 0;
2007     vdev->disabled = false;
2008     qatomic_set(&vdev->isr, 0);
2009     vdev->config_vector = VIRTIO_NO_VECTOR;
2010     virtio_notify_vector(vdev, vdev->config_vector);
2011 
2012     for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2013         vdev->vq[i].vring.desc = 0;
2014         vdev->vq[i].vring.avail = 0;
2015         vdev->vq[i].vring.used = 0;
2016         vdev->vq[i].last_avail_idx = 0;
2017         vdev->vq[i].shadow_avail_idx = 0;
2018         vdev->vq[i].used_idx = 0;
2019         vdev->vq[i].last_avail_wrap_counter = true;
2020         vdev->vq[i].shadow_avail_wrap_counter = true;
2021         vdev->vq[i].used_wrap_counter = true;
2022         virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2023         vdev->vq[i].signalled_used = 0;
2024         vdev->vq[i].signalled_used_valid = false;
2025         vdev->vq[i].notification = true;
2026         vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2027         vdev->vq[i].inuse = 0;
2028         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2029     }
2030 }
2031 
2032 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
2033 {
2034     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2035     uint8_t val;
2036 
2037     if (addr + sizeof(val) > vdev->config_len) {
2038         return (uint32_t)-1;
2039     }
2040 
2041     k->get_config(vdev, vdev->config);
2042 
2043     val = ldub_p(vdev->config + addr);
2044     return val;
2045 }
2046 
2047 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
2048 {
2049     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2050     uint16_t val;
2051 
2052     if (addr + sizeof(val) > vdev->config_len) {
2053         return (uint32_t)-1;
2054     }
2055 
2056     k->get_config(vdev, vdev->config);
2057 
2058     val = lduw_p(vdev->config + addr);
2059     return val;
2060 }
2061 
2062 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
2063 {
2064     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2065     uint32_t val;
2066 
2067     if (addr + sizeof(val) > vdev->config_len) {
2068         return (uint32_t)-1;
2069     }
2070 
2071     k->get_config(vdev, vdev->config);
2072 
2073     val = ldl_p(vdev->config + addr);
2074     return val;
2075 }
2076 
2077 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2078 {
2079     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2080     uint8_t val = data;
2081 
2082     if (addr + sizeof(val) > vdev->config_len) {
2083         return;
2084     }
2085 
2086     stb_p(vdev->config + addr, val);
2087 
2088     if (k->set_config) {
2089         k->set_config(vdev, vdev->config);
2090     }
2091 }
2092 
2093 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2094 {
2095     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2096     uint16_t val = data;
2097 
2098     if (addr + sizeof(val) > vdev->config_len) {
2099         return;
2100     }
2101 
2102     stw_p(vdev->config + addr, val);
2103 
2104     if (k->set_config) {
2105         k->set_config(vdev, vdev->config);
2106     }
2107 }
2108 
2109 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2110 {
2111     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2112     uint32_t val = data;
2113 
2114     if (addr + sizeof(val) > vdev->config_len) {
2115         return;
2116     }
2117 
2118     stl_p(vdev->config + addr, val);
2119 
2120     if (k->set_config) {
2121         k->set_config(vdev, vdev->config);
2122     }
2123 }
2124 
2125 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2126 {
2127     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2128     uint8_t val;
2129 
2130     if (addr + sizeof(val) > vdev->config_len) {
2131         return (uint32_t)-1;
2132     }
2133 
2134     k->get_config(vdev, vdev->config);
2135 
2136     val = ldub_p(vdev->config + addr);
2137     return val;
2138 }
2139 
2140 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2141 {
2142     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2143     uint16_t val;
2144 
2145     if (addr + sizeof(val) > vdev->config_len) {
2146         return (uint32_t)-1;
2147     }
2148 
2149     k->get_config(vdev, vdev->config);
2150 
2151     val = lduw_le_p(vdev->config + addr);
2152     return val;
2153 }
2154 
2155 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2156 {
2157     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2158     uint32_t val;
2159 
2160     if (addr + sizeof(val) > vdev->config_len) {
2161         return (uint32_t)-1;
2162     }
2163 
2164     k->get_config(vdev, vdev->config);
2165 
2166     val = ldl_le_p(vdev->config + addr);
2167     return val;
2168 }
2169 
2170 void virtio_config_modern_writeb(VirtIODevice *vdev,
2171                                  uint32_t addr, uint32_t data)
2172 {
2173     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2174     uint8_t val = data;
2175 
2176     if (addr + sizeof(val) > vdev->config_len) {
2177         return;
2178     }
2179 
2180     stb_p(vdev->config + addr, val);
2181 
2182     if (k->set_config) {
2183         k->set_config(vdev, vdev->config);
2184     }
2185 }
2186 
2187 void virtio_config_modern_writew(VirtIODevice *vdev,
2188                                  uint32_t addr, uint32_t data)
2189 {
2190     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2191     uint16_t val = data;
2192 
2193     if (addr + sizeof(val) > vdev->config_len) {
2194         return;
2195     }
2196 
2197     stw_le_p(vdev->config + addr, val);
2198 
2199     if (k->set_config) {
2200         k->set_config(vdev, vdev->config);
2201     }
2202 }
2203 
2204 void virtio_config_modern_writel(VirtIODevice *vdev,
2205                                  uint32_t addr, uint32_t data)
2206 {
2207     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2208     uint32_t val = data;
2209 
2210     if (addr + sizeof(val) > vdev->config_len) {
2211         return;
2212     }
2213 
2214     stl_le_p(vdev->config + addr, val);
2215 
2216     if (k->set_config) {
2217         k->set_config(vdev, vdev->config);
2218     }
2219 }
2220 
2221 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2222 {
2223     if (!vdev->vq[n].vring.num) {
2224         return;
2225     }
2226     vdev->vq[n].vring.desc = addr;
2227     virtio_queue_update_rings(vdev, n);
2228 }
2229 
2230 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2231 {
2232     return vdev->vq[n].vring.desc;
2233 }
2234 
2235 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2236                             hwaddr avail, hwaddr used)
2237 {
2238     if (!vdev->vq[n].vring.num) {
2239         return;
2240     }
2241     vdev->vq[n].vring.desc = desc;
2242     vdev->vq[n].vring.avail = avail;
2243     vdev->vq[n].vring.used = used;
2244     virtio_init_region_cache(vdev, n);
2245 }
2246 
2247 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2248 {
2249     /* Don't allow guest to flip queue between existent and
2250      * nonexistent states, or to set it to an invalid size.
2251      */
2252     if (!!num != !!vdev->vq[n].vring.num ||
2253         num > VIRTQUEUE_MAX_SIZE ||
2254         num < 0) {
2255         return;
2256     }
2257     vdev->vq[n].vring.num = num;
2258 }
2259 
2260 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2261 {
2262     return QLIST_FIRST(&vdev->vector_queues[vector]);
2263 }
2264 
2265 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2266 {
2267     return QLIST_NEXT(vq, node);
2268 }
2269 
2270 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2271 {
2272     return vdev->vq[n].vring.num;
2273 }
2274 
2275 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2276 {
2277     return vdev->vq[n].vring.num_default;
2278 }
2279 
2280 int virtio_get_num_queues(VirtIODevice *vdev)
2281 {
2282     int i;
2283 
2284     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2285         if (!virtio_queue_get_num(vdev, i)) {
2286             break;
2287         }
2288     }
2289 
2290     return i;
2291 }
2292 
2293 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2294 {
2295     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2296     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2297 
2298     /* virtio-1 compliant devices cannot change the alignment */
2299     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2300         error_report("tried to modify queue alignment for virtio-1 device");
2301         return;
2302     }
2303     /* Check that the transport told us it was going to do this
2304      * (so a buggy transport will immediately assert rather than
2305      * silently failing to migrate this state)
2306      */
2307     assert(k->has_variable_vring_alignment);
2308 
2309     if (align) {
2310         vdev->vq[n].vring.align = align;
2311         virtio_queue_update_rings(vdev, n);
2312     }
2313 }
2314 
2315 static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
2316 {
2317     bool ret = false;
2318 
2319     if (vq->vring.desc && vq->handle_aio_output) {
2320         VirtIODevice *vdev = vq->vdev;
2321 
2322         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2323         ret = vq->handle_aio_output(vdev, vq);
2324 
2325         if (unlikely(vdev->start_on_kick)) {
2326             virtio_set_started(vdev, true);
2327         }
2328     }
2329 
2330     return ret;
2331 }
2332 
2333 static void virtio_queue_notify_vq(VirtQueue *vq)
2334 {
2335     if (vq->vring.desc && vq->handle_output) {
2336         VirtIODevice *vdev = vq->vdev;
2337 
2338         if (unlikely(vdev->broken)) {
2339             return;
2340         }
2341 
2342         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2343         vq->handle_output(vdev, vq);
2344 
2345         if (unlikely(vdev->start_on_kick)) {
2346             virtio_set_started(vdev, true);
2347         }
2348     }
2349 }
2350 
2351 void virtio_queue_notify(VirtIODevice *vdev, int n)
2352 {
2353     VirtQueue *vq = &vdev->vq[n];
2354 
2355     if (unlikely(!vq->vring.desc || vdev->broken)) {
2356         return;
2357     }
2358 
2359     trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2360     if (vq->host_notifier_enabled) {
2361         event_notifier_set(&vq->host_notifier);
2362     } else if (vq->handle_output) {
2363         vq->handle_output(vdev, vq);
2364 
2365         if (unlikely(vdev->start_on_kick)) {
2366             virtio_set_started(vdev, true);
2367         }
2368     }
2369 }
2370 
2371 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2372 {
2373     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2374         VIRTIO_NO_VECTOR;
2375 }
2376 
2377 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2378 {
2379     VirtQueue *vq = &vdev->vq[n];
2380 
2381     if (n < VIRTIO_QUEUE_MAX) {
2382         if (vdev->vector_queues &&
2383             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2384             QLIST_REMOVE(vq, node);
2385         }
2386         vdev->vq[n].vector = vector;
2387         if (vdev->vector_queues &&
2388             vector != VIRTIO_NO_VECTOR) {
2389             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2390         }
2391     }
2392 }
2393 
2394 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2395                             VirtIOHandleOutput handle_output)
2396 {
2397     int i;
2398 
2399     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2400         if (vdev->vq[i].vring.num == 0)
2401             break;
2402     }
2403 
2404     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2405         abort();
2406 
2407     vdev->vq[i].vring.num = queue_size;
2408     vdev->vq[i].vring.num_default = queue_size;
2409     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2410     vdev->vq[i].handle_output = handle_output;
2411     vdev->vq[i].handle_aio_output = NULL;
2412     vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) *
2413                                        queue_size);
2414 
2415     return &vdev->vq[i];
2416 }
2417 
2418 void virtio_delete_queue(VirtQueue *vq)
2419 {
2420     vq->vring.num = 0;
2421     vq->vring.num_default = 0;
2422     vq->handle_output = NULL;
2423     vq->handle_aio_output = NULL;
2424     g_free(vq->used_elems);
2425     vq->used_elems = NULL;
2426     virtio_virtqueue_reset_region_cache(vq);
2427 }
2428 
2429 void virtio_del_queue(VirtIODevice *vdev, int n)
2430 {
2431     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2432         abort();
2433     }
2434 
2435     virtio_delete_queue(&vdev->vq[n]);
2436 }
2437 
2438 static void virtio_set_isr(VirtIODevice *vdev, int value)
2439 {
2440     uint8_t old = qatomic_read(&vdev->isr);
2441 
2442     /* Do not write ISR if it does not change, so that its cacheline remains
2443      * shared in the common case where the guest does not read it.
2444      */
2445     if ((old & value) != value) {
2446         qatomic_or(&vdev->isr, value);
2447     }
2448 }
2449 
2450 /* Called within rcu_read_lock(). */
2451 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2452 {
2453     uint16_t old, new;
2454     bool v;
2455     /* We need to expose used array entries before checking used event. */
2456     smp_mb();
2457     /* Always notify when queue is empty (when feature acknowledge) */
2458     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2459         !vq->inuse && virtio_queue_empty(vq)) {
2460         return true;
2461     }
2462 
2463     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2464         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2465     }
2466 
2467     v = vq->signalled_used_valid;
2468     vq->signalled_used_valid = true;
2469     old = vq->signalled_used;
2470     new = vq->signalled_used = vq->used_idx;
2471     return !v || vring_need_event(vring_get_used_event(vq), new, old);
2472 }
2473 
2474 static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2475                                     uint16_t off_wrap, uint16_t new,
2476                                     uint16_t old)
2477 {
2478     int off = off_wrap & ~(1 << 15);
2479 
2480     if (wrap != off_wrap >> 15) {
2481         off -= vq->vring.num;
2482     }
2483 
2484     return vring_need_event(off, new, old);
2485 }
2486 
2487 /* Called within rcu_read_lock(). */
2488 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2489 {
2490     VRingPackedDescEvent e;
2491     uint16_t old, new;
2492     bool v;
2493     VRingMemoryRegionCaches *caches;
2494 
2495     caches = vring_get_region_caches(vq);
2496     if (!caches) {
2497         return false;
2498     }
2499 
2500     vring_packed_event_read(vdev, &caches->avail, &e);
2501 
2502     old = vq->signalled_used;
2503     new = vq->signalled_used = vq->used_idx;
2504     v = vq->signalled_used_valid;
2505     vq->signalled_used_valid = true;
2506 
2507     if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2508         return false;
2509     } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2510         return true;
2511     }
2512 
2513     return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2514                                          e.off_wrap, new, old);
2515 }
2516 
2517 /* Called within rcu_read_lock().  */
2518 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2519 {
2520     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2521         return virtio_packed_should_notify(vdev, vq);
2522     } else {
2523         return virtio_split_should_notify(vdev, vq);
2524     }
2525 }
2526 
2527 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2528 {
2529     WITH_RCU_READ_LOCK_GUARD() {
2530         if (!virtio_should_notify(vdev, vq)) {
2531             return;
2532         }
2533     }
2534 
2535     trace_virtio_notify_irqfd(vdev, vq);
2536 
2537     /*
2538      * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2539      * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2540      * incorrectly polling this bit during crashdump and hibernation
2541      * in MSI mode, causing a hang if this bit is never updated.
2542      * Recent releases of Windows do not really shut down, but rather
2543      * log out and hibernate to make the next startup faster.  Hence,
2544      * this manifested as a more serious hang during shutdown with
2545      *
2546      * Next driver release from 2016 fixed this problem, so working around it
2547      * is not a must, but it's easy to do so let's do it here.
2548      *
2549      * Note: it's safe to update ISR from any thread as it was switched
2550      * to an atomic operation.
2551      */
2552     virtio_set_isr(vq->vdev, 0x1);
2553     event_notifier_set(&vq->guest_notifier);
2554 }
2555 
2556 static void virtio_irq(VirtQueue *vq)
2557 {
2558     virtio_set_isr(vq->vdev, 0x1);
2559     virtio_notify_vector(vq->vdev, vq->vector);
2560 }
2561 
2562 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2563 {
2564     WITH_RCU_READ_LOCK_GUARD() {
2565         if (!virtio_should_notify(vdev, vq)) {
2566             return;
2567         }
2568     }
2569 
2570     trace_virtio_notify(vdev, vq);
2571     virtio_irq(vq);
2572 }
2573 
2574 void virtio_notify_config(VirtIODevice *vdev)
2575 {
2576     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2577         return;
2578 
2579     virtio_set_isr(vdev, 0x3);
2580     vdev->generation++;
2581     virtio_notify_vector(vdev, vdev->config_vector);
2582 }
2583 
2584 static bool virtio_device_endian_needed(void *opaque)
2585 {
2586     VirtIODevice *vdev = opaque;
2587 
2588     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2589     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2590         return vdev->device_endian != virtio_default_endian();
2591     }
2592     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2593     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2594 }
2595 
2596 static bool virtio_64bit_features_needed(void *opaque)
2597 {
2598     VirtIODevice *vdev = opaque;
2599 
2600     return (vdev->host_features >> 32) != 0;
2601 }
2602 
2603 static bool virtio_virtqueue_needed(void *opaque)
2604 {
2605     VirtIODevice *vdev = opaque;
2606 
2607     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2608 }
2609 
2610 static bool virtio_packed_virtqueue_needed(void *opaque)
2611 {
2612     VirtIODevice *vdev = opaque;
2613 
2614     return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2615 }
2616 
2617 static bool virtio_ringsize_needed(void *opaque)
2618 {
2619     VirtIODevice *vdev = opaque;
2620     int i;
2621 
2622     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2623         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2624             return true;
2625         }
2626     }
2627     return false;
2628 }
2629 
2630 static bool virtio_extra_state_needed(void *opaque)
2631 {
2632     VirtIODevice *vdev = opaque;
2633     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2634     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2635 
2636     return k->has_extra_state &&
2637         k->has_extra_state(qbus->parent);
2638 }
2639 
2640 static bool virtio_broken_needed(void *opaque)
2641 {
2642     VirtIODevice *vdev = opaque;
2643 
2644     return vdev->broken;
2645 }
2646 
2647 static bool virtio_started_needed(void *opaque)
2648 {
2649     VirtIODevice *vdev = opaque;
2650 
2651     return vdev->started;
2652 }
2653 
2654 static bool virtio_disabled_needed(void *opaque)
2655 {
2656     VirtIODevice *vdev = opaque;
2657 
2658     return vdev->disabled;
2659 }
2660 
2661 static const VMStateDescription vmstate_virtqueue = {
2662     .name = "virtqueue_state",
2663     .version_id = 1,
2664     .minimum_version_id = 1,
2665     .fields = (VMStateField[]) {
2666         VMSTATE_UINT64(vring.avail, struct VirtQueue),
2667         VMSTATE_UINT64(vring.used, struct VirtQueue),
2668         VMSTATE_END_OF_LIST()
2669     }
2670 };
2671 
2672 static const VMStateDescription vmstate_packed_virtqueue = {
2673     .name = "packed_virtqueue_state",
2674     .version_id = 1,
2675     .minimum_version_id = 1,
2676     .fields = (VMStateField[]) {
2677         VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2678         VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2679         VMSTATE_UINT16(used_idx, struct VirtQueue),
2680         VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2681         VMSTATE_UINT32(inuse, struct VirtQueue),
2682         VMSTATE_END_OF_LIST()
2683     }
2684 };
2685 
2686 static const VMStateDescription vmstate_virtio_virtqueues = {
2687     .name = "virtio/virtqueues",
2688     .version_id = 1,
2689     .minimum_version_id = 1,
2690     .needed = &virtio_virtqueue_needed,
2691     .fields = (VMStateField[]) {
2692         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2693                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2694         VMSTATE_END_OF_LIST()
2695     }
2696 };
2697 
2698 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2699     .name = "virtio/packed_virtqueues",
2700     .version_id = 1,
2701     .minimum_version_id = 1,
2702     .needed = &virtio_packed_virtqueue_needed,
2703     .fields = (VMStateField[]) {
2704         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2705                       VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2706         VMSTATE_END_OF_LIST()
2707     }
2708 };
2709 
2710 static const VMStateDescription vmstate_ringsize = {
2711     .name = "ringsize_state",
2712     .version_id = 1,
2713     .minimum_version_id = 1,
2714     .fields = (VMStateField[]) {
2715         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2716         VMSTATE_END_OF_LIST()
2717     }
2718 };
2719 
2720 static const VMStateDescription vmstate_virtio_ringsize = {
2721     .name = "virtio/ringsize",
2722     .version_id = 1,
2723     .minimum_version_id = 1,
2724     .needed = &virtio_ringsize_needed,
2725     .fields = (VMStateField[]) {
2726         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2727                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2728         VMSTATE_END_OF_LIST()
2729     }
2730 };
2731 
2732 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2733                            const VMStateField *field)
2734 {
2735     VirtIODevice *vdev = pv;
2736     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2737     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2738 
2739     if (!k->load_extra_state) {
2740         return -1;
2741     } else {
2742         return k->load_extra_state(qbus->parent, f);
2743     }
2744 }
2745 
2746 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2747                            const VMStateField *field, JSONWriter *vmdesc)
2748 {
2749     VirtIODevice *vdev = pv;
2750     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2751     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2752 
2753     k->save_extra_state(qbus->parent, f);
2754     return 0;
2755 }
2756 
2757 static const VMStateInfo vmstate_info_extra_state = {
2758     .name = "virtqueue_extra_state",
2759     .get = get_extra_state,
2760     .put = put_extra_state,
2761 };
2762 
2763 static const VMStateDescription vmstate_virtio_extra_state = {
2764     .name = "virtio/extra_state",
2765     .version_id = 1,
2766     .minimum_version_id = 1,
2767     .needed = &virtio_extra_state_needed,
2768     .fields = (VMStateField[]) {
2769         {
2770             .name         = "extra_state",
2771             .version_id   = 0,
2772             .field_exists = NULL,
2773             .size         = 0,
2774             .info         = &vmstate_info_extra_state,
2775             .flags        = VMS_SINGLE,
2776             .offset       = 0,
2777         },
2778         VMSTATE_END_OF_LIST()
2779     }
2780 };
2781 
2782 static const VMStateDescription vmstate_virtio_device_endian = {
2783     .name = "virtio/device_endian",
2784     .version_id = 1,
2785     .minimum_version_id = 1,
2786     .needed = &virtio_device_endian_needed,
2787     .fields = (VMStateField[]) {
2788         VMSTATE_UINT8(device_endian, VirtIODevice),
2789         VMSTATE_END_OF_LIST()
2790     }
2791 };
2792 
2793 static const VMStateDescription vmstate_virtio_64bit_features = {
2794     .name = "virtio/64bit_features",
2795     .version_id = 1,
2796     .minimum_version_id = 1,
2797     .needed = &virtio_64bit_features_needed,
2798     .fields = (VMStateField[]) {
2799         VMSTATE_UINT64(guest_features, VirtIODevice),
2800         VMSTATE_END_OF_LIST()
2801     }
2802 };
2803 
2804 static const VMStateDescription vmstate_virtio_broken = {
2805     .name = "virtio/broken",
2806     .version_id = 1,
2807     .minimum_version_id = 1,
2808     .needed = &virtio_broken_needed,
2809     .fields = (VMStateField[]) {
2810         VMSTATE_BOOL(broken, VirtIODevice),
2811         VMSTATE_END_OF_LIST()
2812     }
2813 };
2814 
2815 static const VMStateDescription vmstate_virtio_started = {
2816     .name = "virtio/started",
2817     .version_id = 1,
2818     .minimum_version_id = 1,
2819     .needed = &virtio_started_needed,
2820     .fields = (VMStateField[]) {
2821         VMSTATE_BOOL(started, VirtIODevice),
2822         VMSTATE_END_OF_LIST()
2823     }
2824 };
2825 
2826 static const VMStateDescription vmstate_virtio_disabled = {
2827     .name = "virtio/disabled",
2828     .version_id = 1,
2829     .minimum_version_id = 1,
2830     .needed = &virtio_disabled_needed,
2831     .fields = (VMStateField[]) {
2832         VMSTATE_BOOL(disabled, VirtIODevice),
2833         VMSTATE_END_OF_LIST()
2834     }
2835 };
2836 
2837 static const VMStateDescription vmstate_virtio = {
2838     .name = "virtio",
2839     .version_id = 1,
2840     .minimum_version_id = 1,
2841     .minimum_version_id_old = 1,
2842     .fields = (VMStateField[]) {
2843         VMSTATE_END_OF_LIST()
2844     },
2845     .subsections = (const VMStateDescription*[]) {
2846         &vmstate_virtio_device_endian,
2847         &vmstate_virtio_64bit_features,
2848         &vmstate_virtio_virtqueues,
2849         &vmstate_virtio_ringsize,
2850         &vmstate_virtio_broken,
2851         &vmstate_virtio_extra_state,
2852         &vmstate_virtio_started,
2853         &vmstate_virtio_packed_virtqueues,
2854         &vmstate_virtio_disabled,
2855         NULL
2856     }
2857 };
2858 
2859 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2860 {
2861     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2862     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2863     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2864     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2865     int i;
2866 
2867     if (k->save_config) {
2868         k->save_config(qbus->parent, f);
2869     }
2870 
2871     qemu_put_8s(f, &vdev->status);
2872     qemu_put_8s(f, &vdev->isr);
2873     qemu_put_be16s(f, &vdev->queue_sel);
2874     qemu_put_be32s(f, &guest_features_lo);
2875     qemu_put_be32(f, vdev->config_len);
2876     qemu_put_buffer(f, vdev->config, vdev->config_len);
2877 
2878     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2879         if (vdev->vq[i].vring.num == 0)
2880             break;
2881     }
2882 
2883     qemu_put_be32(f, i);
2884 
2885     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2886         if (vdev->vq[i].vring.num == 0)
2887             break;
2888 
2889         qemu_put_be32(f, vdev->vq[i].vring.num);
2890         if (k->has_variable_vring_alignment) {
2891             qemu_put_be32(f, vdev->vq[i].vring.align);
2892         }
2893         /*
2894          * Save desc now, the rest of the ring addresses are saved in
2895          * subsections for VIRTIO-1 devices.
2896          */
2897         qemu_put_be64(f, vdev->vq[i].vring.desc);
2898         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2899         if (k->save_queue) {
2900             k->save_queue(qbus->parent, i, f);
2901         }
2902     }
2903 
2904     if (vdc->save != NULL) {
2905         vdc->save(vdev, f);
2906     }
2907 
2908     if (vdc->vmsd) {
2909         int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2910         if (ret) {
2911             return ret;
2912         }
2913     }
2914 
2915     /* Subsections */
2916     return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2917 }
2918 
2919 /* A wrapper for use as a VMState .put function */
2920 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2921                               const VMStateField *field, JSONWriter *vmdesc)
2922 {
2923     return virtio_save(VIRTIO_DEVICE(opaque), f);
2924 }
2925 
2926 /* A wrapper for use as a VMState .get function */
2927 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2928                              const VMStateField *field)
2929 {
2930     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2931     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2932 
2933     return virtio_load(vdev, f, dc->vmsd->version_id);
2934 }
2935 
2936 const VMStateInfo  virtio_vmstate_info = {
2937     .name = "virtio",
2938     .get = virtio_device_get,
2939     .put = virtio_device_put,
2940 };
2941 
2942 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2943 {
2944     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2945     bool bad = (val & ~(vdev->host_features)) != 0;
2946 
2947     val &= vdev->host_features;
2948     if (k->set_features) {
2949         k->set_features(vdev, val);
2950     }
2951     vdev->guest_features = val;
2952     return bad ? -1 : 0;
2953 }
2954 
2955 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2956 {
2957     int ret;
2958     /*
2959      * The driver must not attempt to set features after feature negotiation
2960      * has finished.
2961      */
2962     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2963         return -EINVAL;
2964     }
2965     ret = virtio_set_features_nocheck(vdev, val);
2966     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2967         /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2968         int i;
2969         for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2970             if (vdev->vq[i].vring.num != 0) {
2971                 virtio_init_region_cache(vdev, i);
2972             }
2973         }
2974     }
2975     if (!ret) {
2976         if (!virtio_device_started(vdev, vdev->status) &&
2977             !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2978             vdev->start_on_kick = true;
2979         }
2980     }
2981     return ret;
2982 }
2983 
2984 size_t virtio_feature_get_config_size(const VirtIOFeature *feature_sizes,
2985                                       uint64_t host_features)
2986 {
2987     size_t config_size = 0;
2988     int i;
2989 
2990     for (i = 0; feature_sizes[i].flags != 0; i++) {
2991         if (host_features & feature_sizes[i].flags) {
2992             config_size = MAX(feature_sizes[i].end, config_size);
2993         }
2994     }
2995 
2996     return config_size;
2997 }
2998 
2999 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
3000 {
3001     int i, ret;
3002     int32_t config_len;
3003     uint32_t num;
3004     uint32_t features;
3005     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3006     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3007     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3008 
3009     /*
3010      * We poison the endianness to ensure it does not get used before
3011      * subsections have been loaded.
3012      */
3013     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3014 
3015     if (k->load_config) {
3016         ret = k->load_config(qbus->parent, f);
3017         if (ret)
3018             return ret;
3019     }
3020 
3021     qemu_get_8s(f, &vdev->status);
3022     qemu_get_8s(f, &vdev->isr);
3023     qemu_get_be16s(f, &vdev->queue_sel);
3024     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3025         return -1;
3026     }
3027     qemu_get_be32s(f, &features);
3028 
3029     /*
3030      * Temporarily set guest_features low bits - needed by
3031      * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3032      * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3033      *
3034      * Note: devices should always test host features in future - don't create
3035      * new dependencies like this.
3036      */
3037     vdev->guest_features = features;
3038 
3039     config_len = qemu_get_be32(f);
3040 
3041     /*
3042      * There are cases where the incoming config can be bigger or smaller
3043      * than what we have; so load what we have space for, and skip
3044      * any excess that's in the stream.
3045      */
3046     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3047 
3048     while (config_len > vdev->config_len) {
3049         qemu_get_byte(f);
3050         config_len--;
3051     }
3052 
3053     num = qemu_get_be32(f);
3054 
3055     if (num > VIRTIO_QUEUE_MAX) {
3056         error_report("Invalid number of virtqueues: 0x%x", num);
3057         return -1;
3058     }
3059 
3060     for (i = 0; i < num; i++) {
3061         vdev->vq[i].vring.num = qemu_get_be32(f);
3062         if (k->has_variable_vring_alignment) {
3063             vdev->vq[i].vring.align = qemu_get_be32(f);
3064         }
3065         vdev->vq[i].vring.desc = qemu_get_be64(f);
3066         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3067         vdev->vq[i].signalled_used_valid = false;
3068         vdev->vq[i].notification = true;
3069 
3070         if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3071             error_report("VQ %d address 0x0 "
3072                          "inconsistent with Host index 0x%x",
3073                          i, vdev->vq[i].last_avail_idx);
3074             return -1;
3075         }
3076         if (k->load_queue) {
3077             ret = k->load_queue(qbus->parent, i, f);
3078             if (ret)
3079                 return ret;
3080         }
3081     }
3082 
3083     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3084 
3085     if (vdc->load != NULL) {
3086         ret = vdc->load(vdev, f, version_id);
3087         if (ret) {
3088             return ret;
3089         }
3090     }
3091 
3092     if (vdc->vmsd) {
3093         ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3094         if (ret) {
3095             return ret;
3096         }
3097     }
3098 
3099     /* Subsections */
3100     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3101     if (ret) {
3102         return ret;
3103     }
3104 
3105     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3106         vdev->device_endian = virtio_default_endian();
3107     }
3108 
3109     if (virtio_64bit_features_needed(vdev)) {
3110         /*
3111          * Subsection load filled vdev->guest_features.  Run them
3112          * through virtio_set_features to sanity-check them against
3113          * host_features.
3114          */
3115         uint64_t features64 = vdev->guest_features;
3116         if (virtio_set_features_nocheck(vdev, features64) < 0) {
3117             error_report("Features 0x%" PRIx64 " unsupported. "
3118                          "Allowed features: 0x%" PRIx64,
3119                          features64, vdev->host_features);
3120             return -1;
3121         }
3122     } else {
3123         if (virtio_set_features_nocheck(vdev, features) < 0) {
3124             error_report("Features 0x%x unsupported. "
3125                          "Allowed features: 0x%" PRIx64,
3126                          features, vdev->host_features);
3127             return -1;
3128         }
3129     }
3130 
3131     if (!virtio_device_started(vdev, vdev->status) &&
3132         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3133         vdev->start_on_kick = true;
3134     }
3135 
3136     RCU_READ_LOCK_GUARD();
3137     for (i = 0; i < num; i++) {
3138         if (vdev->vq[i].vring.desc) {
3139             uint16_t nheads;
3140 
3141             /*
3142              * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3143              * only the region cache needs to be set up.  Legacy devices need
3144              * to calculate used and avail ring addresses based on the desc
3145              * address.
3146              */
3147             if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3148                 virtio_init_region_cache(vdev, i);
3149             } else {
3150                 virtio_queue_update_rings(vdev, i);
3151             }
3152 
3153             if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3154                 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3155                 vdev->vq[i].shadow_avail_wrap_counter =
3156                                         vdev->vq[i].last_avail_wrap_counter;
3157                 continue;
3158             }
3159 
3160             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3161             /* Check it isn't doing strange things with descriptor numbers. */
3162             if (nheads > vdev->vq[i].vring.num) {
3163                 virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3164                              "inconsistent with Host index 0x%x: delta 0x%x",
3165                              i, vdev->vq[i].vring.num,
3166                              vring_avail_idx(&vdev->vq[i]),
3167                              vdev->vq[i].last_avail_idx, nheads);
3168                 vdev->vq[i].used_idx = 0;
3169                 vdev->vq[i].shadow_avail_idx = 0;
3170                 vdev->vq[i].inuse = 0;
3171                 continue;
3172             }
3173             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3174             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3175 
3176             /*
3177              * Some devices migrate VirtQueueElements that have been popped
3178              * from the avail ring but not yet returned to the used ring.
3179              * Since max ring size < UINT16_MAX it's safe to use modulo
3180              * UINT16_MAX + 1 subtraction.
3181              */
3182             vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3183                                 vdev->vq[i].used_idx);
3184             if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3185                 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3186                              "used_idx 0x%x",
3187                              i, vdev->vq[i].vring.num,
3188                              vdev->vq[i].last_avail_idx,
3189                              vdev->vq[i].used_idx);
3190                 return -1;
3191             }
3192         }
3193     }
3194 
3195     if (vdc->post_load) {
3196         ret = vdc->post_load(vdev);
3197         if (ret) {
3198             return ret;
3199         }
3200     }
3201 
3202     return 0;
3203 }
3204 
3205 void virtio_cleanup(VirtIODevice *vdev)
3206 {
3207     qemu_del_vm_change_state_handler(vdev->vmstate);
3208 }
3209 
3210 static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3211 {
3212     VirtIODevice *vdev = opaque;
3213     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3214     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3215     bool backend_run = running && virtio_device_started(vdev, vdev->status);
3216     vdev->vm_running = running;
3217 
3218     if (backend_run) {
3219         virtio_set_status(vdev, vdev->status);
3220     }
3221 
3222     if (k->vmstate_change) {
3223         k->vmstate_change(qbus->parent, backend_run);
3224     }
3225 
3226     if (!backend_run) {
3227         virtio_set_status(vdev, vdev->status);
3228     }
3229 }
3230 
3231 void virtio_instance_init_common(Object *proxy_obj, void *data,
3232                                  size_t vdev_size, const char *vdev_name)
3233 {
3234     DeviceState *vdev = data;
3235 
3236     object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3237                                        vdev_size, vdev_name, &error_abort,
3238                                        NULL);
3239     qdev_alias_all_properties(vdev, proxy_obj);
3240 }
3241 
3242 void virtio_init(VirtIODevice *vdev, const char *name,
3243                  uint16_t device_id, size_t config_size)
3244 {
3245     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3246     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3247     int i;
3248     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3249 
3250     if (nvectors) {
3251         vdev->vector_queues =
3252             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3253     }
3254 
3255     vdev->start_on_kick = false;
3256     vdev->started = false;
3257     vdev->device_id = device_id;
3258     vdev->status = 0;
3259     qatomic_set(&vdev->isr, 0);
3260     vdev->queue_sel = 0;
3261     vdev->config_vector = VIRTIO_NO_VECTOR;
3262     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
3263     vdev->vm_running = runstate_is_running();
3264     vdev->broken = false;
3265     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3266         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3267         vdev->vq[i].vdev = vdev;
3268         vdev->vq[i].queue_index = i;
3269         vdev->vq[i].host_notifier_enabled = false;
3270     }
3271 
3272     vdev->name = name;
3273     vdev->config_len = config_size;
3274     if (vdev->config_len) {
3275         vdev->config = g_malloc0(config_size);
3276     } else {
3277         vdev->config = NULL;
3278     }
3279     vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3280             virtio_vmstate_change, vdev);
3281     vdev->device_endian = virtio_default_endian();
3282     vdev->use_guest_notifier_mask = true;
3283 }
3284 
3285 /*
3286  * Only devices that have already been around prior to defining the virtio
3287  * standard support legacy mode; this includes devices not specified in the
3288  * standard. All newer devices conform to the virtio standard only.
3289  */
3290 bool virtio_legacy_allowed(VirtIODevice *vdev)
3291 {
3292     switch (vdev->device_id) {
3293     case VIRTIO_ID_NET:
3294     case VIRTIO_ID_BLOCK:
3295     case VIRTIO_ID_CONSOLE:
3296     case VIRTIO_ID_RNG:
3297     case VIRTIO_ID_BALLOON:
3298     case VIRTIO_ID_RPMSG:
3299     case VIRTIO_ID_SCSI:
3300     case VIRTIO_ID_9P:
3301     case VIRTIO_ID_RPROC_SERIAL:
3302     case VIRTIO_ID_CAIF:
3303         return true;
3304     default:
3305         return false;
3306     }
3307 }
3308 
3309 bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3310 {
3311     return vdev->disable_legacy_check;
3312 }
3313 
3314 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3315 {
3316     return vdev->vq[n].vring.desc;
3317 }
3318 
3319 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3320 {
3321     return virtio_queue_get_desc_addr(vdev, n) != 0;
3322 }
3323 
3324 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3325 {
3326     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3327     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3328 
3329     if (k->queue_enabled) {
3330         return k->queue_enabled(qbus->parent, n);
3331     }
3332     return virtio_queue_enabled_legacy(vdev, n);
3333 }
3334 
3335 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3336 {
3337     return vdev->vq[n].vring.avail;
3338 }
3339 
3340 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3341 {
3342     return vdev->vq[n].vring.used;
3343 }
3344 
3345 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3346 {
3347     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3348 }
3349 
3350 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3351 {
3352     int s;
3353 
3354     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3355         return sizeof(struct VRingPackedDescEvent);
3356     }
3357 
3358     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3359     return offsetof(VRingAvail, ring) +
3360         sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3361 }
3362 
3363 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3364 {
3365     int s;
3366 
3367     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3368         return sizeof(struct VRingPackedDescEvent);
3369     }
3370 
3371     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3372     return offsetof(VRingUsed, ring) +
3373         sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3374 }
3375 
3376 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3377                                                            int n)
3378 {
3379     unsigned int avail, used;
3380 
3381     avail = vdev->vq[n].last_avail_idx;
3382     avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3383 
3384     used = vdev->vq[n].used_idx;
3385     used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3386 
3387     return avail | used << 16;
3388 }
3389 
3390 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3391                                                       int n)
3392 {
3393     return vdev->vq[n].last_avail_idx;
3394 }
3395 
3396 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3397 {
3398     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3399         return virtio_queue_packed_get_last_avail_idx(vdev, n);
3400     } else {
3401         return virtio_queue_split_get_last_avail_idx(vdev, n);
3402     }
3403 }
3404 
3405 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3406                                                    int n, unsigned int idx)
3407 {
3408     struct VirtQueue *vq = &vdev->vq[n];
3409 
3410     vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3411     vq->last_avail_wrap_counter =
3412         vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3413     idx >>= 16;
3414     vq->used_idx = idx & 0x7ffff;
3415     vq->used_wrap_counter = !!(idx & 0x8000);
3416 }
3417 
3418 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3419                                                   int n, unsigned int idx)
3420 {
3421         vdev->vq[n].last_avail_idx = idx;
3422         vdev->vq[n].shadow_avail_idx = idx;
3423 }
3424 
3425 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3426                                      unsigned int idx)
3427 {
3428     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3429         virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3430     } else {
3431         virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3432     }
3433 }
3434 
3435 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3436                                                        int n)
3437 {
3438     /* We don't have a reference like avail idx in shared memory */
3439     return;
3440 }
3441 
3442 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3443                                                       int n)
3444 {
3445     RCU_READ_LOCK_GUARD();
3446     if (vdev->vq[n].vring.desc) {
3447         vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3448         vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3449     }
3450 }
3451 
3452 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3453 {
3454     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3455         virtio_queue_packed_restore_last_avail_idx(vdev, n);
3456     } else {
3457         virtio_queue_split_restore_last_avail_idx(vdev, n);
3458     }
3459 }
3460 
3461 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3462 {
3463     /* used idx was updated through set_last_avail_idx() */
3464     return;
3465 }
3466 
3467 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3468 {
3469     RCU_READ_LOCK_GUARD();
3470     if (vdev->vq[n].vring.desc) {
3471         vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3472     }
3473 }
3474 
3475 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3476 {
3477     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3478         return virtio_queue_packed_update_used_idx(vdev, n);
3479     } else {
3480         return virtio_split_packed_update_used_idx(vdev, n);
3481     }
3482 }
3483 
3484 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3485 {
3486     vdev->vq[n].signalled_used_valid = false;
3487 }
3488 
3489 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3490 {
3491     return vdev->vq + n;
3492 }
3493 
3494 uint16_t virtio_get_queue_index(VirtQueue *vq)
3495 {
3496     return vq->queue_index;
3497 }
3498 
3499 static void virtio_queue_guest_notifier_read(EventNotifier *n)
3500 {
3501     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3502     if (event_notifier_test_and_clear(n)) {
3503         virtio_irq(vq);
3504     }
3505 }
3506 
3507 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3508                                                 bool with_irqfd)
3509 {
3510     if (assign && !with_irqfd) {
3511         event_notifier_set_handler(&vq->guest_notifier,
3512                                    virtio_queue_guest_notifier_read);
3513     } else {
3514         event_notifier_set_handler(&vq->guest_notifier, NULL);
3515     }
3516     if (!assign) {
3517         /* Test and clear notifier before closing it,
3518          * in case poll callback didn't have time to run. */
3519         virtio_queue_guest_notifier_read(&vq->guest_notifier);
3520     }
3521 }
3522 
3523 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3524 {
3525     return &vq->guest_notifier;
3526 }
3527 
3528 static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
3529 {
3530     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3531     if (event_notifier_test_and_clear(n)) {
3532         virtio_queue_notify_aio_vq(vq);
3533     }
3534 }
3535 
3536 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3537 {
3538     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3539 
3540     virtio_queue_set_notification(vq, 0);
3541 }
3542 
3543 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3544 {
3545     EventNotifier *n = opaque;
3546     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3547 
3548     if (!vq->vring.desc || virtio_queue_empty(vq)) {
3549         return false;
3550     }
3551 
3552     return virtio_queue_notify_aio_vq(vq);
3553 }
3554 
3555 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3556 {
3557     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3558 
3559     /* Caller polls once more after this to catch requests that race with us */
3560     virtio_queue_set_notification(vq, 1);
3561 }
3562 
3563 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
3564                                                 VirtIOHandleAIOOutput handle_output)
3565 {
3566     if (handle_output) {
3567         vq->handle_aio_output = handle_output;
3568         aio_set_event_notifier(ctx, &vq->host_notifier, true,
3569                                virtio_queue_host_notifier_aio_read,
3570                                virtio_queue_host_notifier_aio_poll);
3571         aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3572                                     virtio_queue_host_notifier_aio_poll_begin,
3573                                     virtio_queue_host_notifier_aio_poll_end);
3574     } else {
3575         aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
3576         /* Test and clear notifier before after disabling event,
3577          * in case poll callback didn't have time to run. */
3578         virtio_queue_host_notifier_aio_read(&vq->host_notifier);
3579         vq->handle_aio_output = NULL;
3580     }
3581 }
3582 
3583 void virtio_queue_host_notifier_read(EventNotifier *n)
3584 {
3585     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3586     if (event_notifier_test_and_clear(n)) {
3587         virtio_queue_notify_vq(vq);
3588     }
3589 }
3590 
3591 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3592 {
3593     return &vq->host_notifier;
3594 }
3595 
3596 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3597 {
3598     vq->host_notifier_enabled = enabled;
3599 }
3600 
3601 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3602                                       MemoryRegion *mr, bool assign)
3603 {
3604     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3605     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3606 
3607     if (k->set_host_notifier_mr) {
3608         return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3609     }
3610 
3611     return -1;
3612 }
3613 
3614 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3615 {
3616     g_free(vdev->bus_name);
3617     vdev->bus_name = g_strdup(bus_name);
3618 }
3619 
3620 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3621 {
3622     va_list ap;
3623 
3624     va_start(ap, fmt);
3625     error_vreport(fmt, ap);
3626     va_end(ap);
3627 
3628     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3629         vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3630         virtio_notify_config(vdev);
3631     }
3632 
3633     vdev->broken = true;
3634 }
3635 
3636 static void virtio_memory_listener_commit(MemoryListener *listener)
3637 {
3638     VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3639     int i;
3640 
3641     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3642         if (vdev->vq[i].vring.num == 0) {
3643             break;
3644         }
3645         virtio_init_region_cache(vdev, i);
3646     }
3647 }
3648 
3649 static void virtio_device_realize(DeviceState *dev, Error **errp)
3650 {
3651     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3652     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3653     Error *err = NULL;
3654 
3655     /* Devices should either use vmsd or the load/save methods */
3656     assert(!vdc->vmsd || !vdc->load);
3657 
3658     if (vdc->realize != NULL) {
3659         vdc->realize(dev, &err);
3660         if (err != NULL) {
3661             error_propagate(errp, err);
3662             return;
3663         }
3664     }
3665 
3666     virtio_bus_device_plugged(vdev, &err);
3667     if (err != NULL) {
3668         error_propagate(errp, err);
3669         vdc->unrealize(dev);
3670         return;
3671     }
3672 
3673     vdev->listener.commit = virtio_memory_listener_commit;
3674     memory_listener_register(&vdev->listener, vdev->dma_as);
3675 }
3676 
3677 static void virtio_device_unrealize(DeviceState *dev)
3678 {
3679     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3680     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3681 
3682     memory_listener_unregister(&vdev->listener);
3683     virtio_bus_device_unplugged(vdev);
3684 
3685     if (vdc->unrealize != NULL) {
3686         vdc->unrealize(dev);
3687     }
3688 
3689     g_free(vdev->bus_name);
3690     vdev->bus_name = NULL;
3691 }
3692 
3693 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3694 {
3695     int i;
3696     if (!vdev->vq) {
3697         return;
3698     }
3699 
3700     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3701         if (vdev->vq[i].vring.num == 0) {
3702             break;
3703         }
3704         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3705     }
3706     g_free(vdev->vq);
3707 }
3708 
3709 static void virtio_device_instance_finalize(Object *obj)
3710 {
3711     VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3712 
3713     virtio_device_free_virtqueues(vdev);
3714 
3715     g_free(vdev->config);
3716     g_free(vdev->vector_queues);
3717 }
3718 
3719 static Property virtio_properties[] = {
3720     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3721     DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3722     DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3723     DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3724                      disable_legacy_check, false),
3725     DEFINE_PROP_END_OF_LIST(),
3726 };
3727 
3728 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3729 {
3730     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3731     int i, n, r, err;
3732 
3733     /*
3734      * Batch all the host notifiers in a single transaction to avoid
3735      * quadratic time complexity in address_space_update_ioeventfds().
3736      */
3737     memory_region_transaction_begin();
3738     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3739         VirtQueue *vq = &vdev->vq[n];
3740         if (!virtio_queue_get_num(vdev, n)) {
3741             continue;
3742         }
3743         r = virtio_bus_set_host_notifier(qbus, n, true);
3744         if (r < 0) {
3745             err = r;
3746             goto assign_error;
3747         }
3748         event_notifier_set_handler(&vq->host_notifier,
3749                                    virtio_queue_host_notifier_read);
3750     }
3751 
3752     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3753         /* Kick right away to begin processing requests already in vring */
3754         VirtQueue *vq = &vdev->vq[n];
3755         if (!vq->vring.num) {
3756             continue;
3757         }
3758         event_notifier_set(&vq->host_notifier);
3759     }
3760     memory_region_transaction_commit();
3761     return 0;
3762 
3763 assign_error:
3764     i = n; /* save n for a second iteration after transaction is committed. */
3765     while (--n >= 0) {
3766         VirtQueue *vq = &vdev->vq[n];
3767         if (!virtio_queue_get_num(vdev, n)) {
3768             continue;
3769         }
3770 
3771         event_notifier_set_handler(&vq->host_notifier, NULL);
3772         r = virtio_bus_set_host_notifier(qbus, n, false);
3773         assert(r >= 0);
3774     }
3775     /*
3776      * The transaction expects the ioeventfds to be open when it
3777      * commits. Do it now, before the cleanup loop.
3778      */
3779     memory_region_transaction_commit();
3780 
3781     while (--i >= 0) {
3782         if (!virtio_queue_get_num(vdev, i)) {
3783             continue;
3784         }
3785         virtio_bus_cleanup_host_notifier(qbus, i);
3786     }
3787     return err;
3788 }
3789 
3790 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3791 {
3792     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3793     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3794 
3795     return virtio_bus_start_ioeventfd(vbus);
3796 }
3797 
3798 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3799 {
3800     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3801     int n, r;
3802 
3803     /*
3804      * Batch all the host notifiers in a single transaction to avoid
3805      * quadratic time complexity in address_space_update_ioeventfds().
3806      */
3807     memory_region_transaction_begin();
3808     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3809         VirtQueue *vq = &vdev->vq[n];
3810 
3811         if (!virtio_queue_get_num(vdev, n)) {
3812             continue;
3813         }
3814         event_notifier_set_handler(&vq->host_notifier, NULL);
3815         r = virtio_bus_set_host_notifier(qbus, n, false);
3816         assert(r >= 0);
3817     }
3818     /*
3819      * The transaction expects the ioeventfds to be open when it
3820      * commits. Do it now, before the cleanup loop.
3821      */
3822     memory_region_transaction_commit();
3823 
3824     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3825         if (!virtio_queue_get_num(vdev, n)) {
3826             continue;
3827         }
3828         virtio_bus_cleanup_host_notifier(qbus, n);
3829     }
3830 }
3831 
3832 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3833 {
3834     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3835     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3836 
3837     return virtio_bus_grab_ioeventfd(vbus);
3838 }
3839 
3840 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3841 {
3842     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3843     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3844 
3845     virtio_bus_release_ioeventfd(vbus);
3846 }
3847 
3848 static void virtio_device_class_init(ObjectClass *klass, void *data)
3849 {
3850     /* Set the default value here. */
3851     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3852     DeviceClass *dc = DEVICE_CLASS(klass);
3853 
3854     dc->realize = virtio_device_realize;
3855     dc->unrealize = virtio_device_unrealize;
3856     dc->bus_type = TYPE_VIRTIO_BUS;
3857     device_class_set_props(dc, virtio_properties);
3858     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3859     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3860 
3861     vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3862 }
3863 
3864 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3865 {
3866     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3867     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3868 
3869     return virtio_bus_ioeventfd_enabled(vbus);
3870 }
3871 
3872 static const TypeInfo virtio_device_info = {
3873     .name = TYPE_VIRTIO_DEVICE,
3874     .parent = TYPE_DEVICE,
3875     .instance_size = sizeof(VirtIODevice),
3876     .class_init = virtio_device_class_init,
3877     .instance_finalize = virtio_device_instance_finalize,
3878     .abstract = true,
3879     .class_size = sizeof(VirtioDeviceClass),
3880 };
3881 
3882 static void virtio_register_types(void)
3883 {
3884     type_register_static(&virtio_device_info);
3885 }
3886 
3887 type_init(virtio_register_types)
3888