xref: /openbmc/qemu/hw/virtio/virtio.c (revision dbd9e084)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "cpu.h"
17 #include "trace.h"
18 #include "qemu/error-report.h"
19 #include "qemu/log.h"
20 #include "qemu/main-loop.h"
21 #include "qemu/module.h"
22 #include "hw/virtio/virtio.h"
23 #include "migration/qemu-file-types.h"
24 #include "qemu/atomic.h"
25 #include "hw/virtio/virtio-bus.h"
26 #include "hw/qdev-properties.h"
27 #include "hw/virtio/virtio-access.h"
28 #include "sysemu/dma.h"
29 #include "sysemu/runstate.h"
30 #include "standard-headers/linux/virtio_ids.h"
31 
32 /*
33  * The alignment to use between consumer and producer parts of vring.
34  * x86 pagesize again. This is the default, used by transports like PCI
35  * which don't provide a means for the guest to tell the host the alignment.
36  */
37 #define VIRTIO_PCI_VRING_ALIGN         4096
38 
39 typedef struct VRingDesc
40 {
41     uint64_t addr;
42     uint32_t len;
43     uint16_t flags;
44     uint16_t next;
45 } VRingDesc;
46 
47 typedef struct VRingPackedDesc {
48     uint64_t addr;
49     uint32_t len;
50     uint16_t id;
51     uint16_t flags;
52 } VRingPackedDesc;
53 
54 typedef struct VRingAvail
55 {
56     uint16_t flags;
57     uint16_t idx;
58     uint16_t ring[];
59 } VRingAvail;
60 
61 typedef struct VRingUsedElem
62 {
63     uint32_t id;
64     uint32_t len;
65 } VRingUsedElem;
66 
67 typedef struct VRingUsed
68 {
69     uint16_t flags;
70     uint16_t idx;
71     VRingUsedElem ring[];
72 } VRingUsed;
73 
74 typedef struct VRingMemoryRegionCaches {
75     struct rcu_head rcu;
76     MemoryRegionCache desc;
77     MemoryRegionCache avail;
78     MemoryRegionCache used;
79 } VRingMemoryRegionCaches;
80 
81 typedef struct VRing
82 {
83     unsigned int num;
84     unsigned int num_default;
85     unsigned int align;
86     hwaddr desc;
87     hwaddr avail;
88     hwaddr used;
89     VRingMemoryRegionCaches *caches;
90 } VRing;
91 
92 typedef struct VRingPackedDescEvent {
93     uint16_t off_wrap;
94     uint16_t flags;
95 } VRingPackedDescEvent ;
96 
97 struct VirtQueue
98 {
99     VRing vring;
100     VirtQueueElement *used_elems;
101 
102     /* Next head to pop */
103     uint16_t last_avail_idx;
104     bool last_avail_wrap_counter;
105 
106     /* Last avail_idx read from VQ. */
107     uint16_t shadow_avail_idx;
108     bool shadow_avail_wrap_counter;
109 
110     uint16_t used_idx;
111     bool used_wrap_counter;
112 
113     /* Last used index value we have signalled on */
114     uint16_t signalled_used;
115 
116     /* Last used index value we have signalled on */
117     bool signalled_used_valid;
118 
119     /* Notification enabled? */
120     bool notification;
121 
122     uint16_t queue_index;
123 
124     unsigned int inuse;
125 
126     uint16_t vector;
127     VirtIOHandleOutput handle_output;
128     VirtIOHandleAIOOutput handle_aio_output;
129     VirtIODevice *vdev;
130     EventNotifier guest_notifier;
131     EventNotifier host_notifier;
132     bool host_notifier_enabled;
133     QLIST_ENTRY(VirtQueue) node;
134 };
135 
136 /* Called within call_rcu().  */
137 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
138 {
139     assert(caches != NULL);
140     address_space_cache_destroy(&caches->desc);
141     address_space_cache_destroy(&caches->avail);
142     address_space_cache_destroy(&caches->used);
143     g_free(caches);
144 }
145 
146 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
147 {
148     VRingMemoryRegionCaches *caches;
149 
150     caches = qatomic_read(&vq->vring.caches);
151     qatomic_rcu_set(&vq->vring.caches, NULL);
152     if (caches) {
153         call_rcu(caches, virtio_free_region_cache, rcu);
154     }
155 }
156 
157 static void virtio_init_region_cache(VirtIODevice *vdev, int n)
158 {
159     VirtQueue *vq = &vdev->vq[n];
160     VRingMemoryRegionCaches *old = vq->vring.caches;
161     VRingMemoryRegionCaches *new = NULL;
162     hwaddr addr, size;
163     int64_t len;
164     bool packed;
165 
166 
167     addr = vq->vring.desc;
168     if (!addr) {
169         goto out_no_cache;
170     }
171     new = g_new0(VRingMemoryRegionCaches, 1);
172     size = virtio_queue_get_desc_size(vdev, n);
173     packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
174                                    true : false;
175     len = address_space_cache_init(&new->desc, vdev->dma_as,
176                                    addr, size, packed);
177     if (len < size) {
178         virtio_error(vdev, "Cannot map desc");
179         goto err_desc;
180     }
181 
182     size = virtio_queue_get_used_size(vdev, n);
183     len = address_space_cache_init(&new->used, vdev->dma_as,
184                                    vq->vring.used, size, true);
185     if (len < size) {
186         virtio_error(vdev, "Cannot map used");
187         goto err_used;
188     }
189 
190     size = virtio_queue_get_avail_size(vdev, n);
191     len = address_space_cache_init(&new->avail, vdev->dma_as,
192                                    vq->vring.avail, size, false);
193     if (len < size) {
194         virtio_error(vdev, "Cannot map avail");
195         goto err_avail;
196     }
197 
198     qatomic_rcu_set(&vq->vring.caches, new);
199     if (old) {
200         call_rcu(old, virtio_free_region_cache, rcu);
201     }
202     return;
203 
204 err_avail:
205     address_space_cache_destroy(&new->avail);
206 err_used:
207     address_space_cache_destroy(&new->used);
208 err_desc:
209     address_space_cache_destroy(&new->desc);
210 out_no_cache:
211     g_free(new);
212     virtio_virtqueue_reset_region_cache(vq);
213 }
214 
215 /* virt queue functions */
216 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
217 {
218     VRing *vring = &vdev->vq[n].vring;
219 
220     if (!vring->num || !vring->desc || !vring->align) {
221         /* not yet setup -> nothing to do */
222         return;
223     }
224     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
225     vring->used = vring_align(vring->avail +
226                               offsetof(VRingAvail, ring[vring->num]),
227                               vring->align);
228     virtio_init_region_cache(vdev, n);
229 }
230 
231 /* Called within rcu_read_lock().  */
232 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
233                                   MemoryRegionCache *cache, int i)
234 {
235     address_space_read_cached(cache, i * sizeof(VRingDesc),
236                               desc, sizeof(VRingDesc));
237     virtio_tswap64s(vdev, &desc->addr);
238     virtio_tswap32s(vdev, &desc->len);
239     virtio_tswap16s(vdev, &desc->flags);
240     virtio_tswap16s(vdev, &desc->next);
241 }
242 
243 static void vring_packed_event_read(VirtIODevice *vdev,
244                                     MemoryRegionCache *cache,
245                                     VRingPackedDescEvent *e)
246 {
247     hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
248     hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
249 
250     address_space_read_cached(cache, off_flags, &e->flags,
251                               sizeof(e->flags));
252     /* Make sure flags is seen before off_wrap */
253     smp_rmb();
254     address_space_read_cached(cache, off_off, &e->off_wrap,
255                               sizeof(e->off_wrap));
256     virtio_tswap16s(vdev, &e->off_wrap);
257     virtio_tswap16s(vdev, &e->flags);
258 }
259 
260 static void vring_packed_off_wrap_write(VirtIODevice *vdev,
261                                         MemoryRegionCache *cache,
262                                         uint16_t off_wrap)
263 {
264     hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
265 
266     virtio_tswap16s(vdev, &off_wrap);
267     address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap));
268     address_space_cache_invalidate(cache, off, sizeof(off_wrap));
269 }
270 
271 static void vring_packed_flags_write(VirtIODevice *vdev,
272                                      MemoryRegionCache *cache, uint16_t flags)
273 {
274     hwaddr off = offsetof(VRingPackedDescEvent, flags);
275 
276     virtio_tswap16s(vdev, &flags);
277     address_space_write_cached(cache, off, &flags, sizeof(flags));
278     address_space_cache_invalidate(cache, off, sizeof(flags));
279 }
280 
281 /* Called within rcu_read_lock().  */
282 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
283 {
284     return qatomic_rcu_read(&vq->vring.caches);
285 }
286 
287 /* Called within rcu_read_lock().  */
288 static inline uint16_t vring_avail_flags(VirtQueue *vq)
289 {
290     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
291     hwaddr pa = offsetof(VRingAvail, flags);
292 
293     if (!caches) {
294         return 0;
295     }
296 
297     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
298 }
299 
300 /* Called within rcu_read_lock().  */
301 static inline uint16_t vring_avail_idx(VirtQueue *vq)
302 {
303     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
304     hwaddr pa = offsetof(VRingAvail, idx);
305 
306     if (!caches) {
307         return 0;
308     }
309 
310     vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
311     return vq->shadow_avail_idx;
312 }
313 
314 /* Called within rcu_read_lock().  */
315 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
316 {
317     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
318     hwaddr pa = offsetof(VRingAvail, ring[i]);
319 
320     if (!caches) {
321         return 0;
322     }
323 
324     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
325 }
326 
327 /* Called within rcu_read_lock().  */
328 static inline uint16_t vring_get_used_event(VirtQueue *vq)
329 {
330     return vring_avail_ring(vq, vq->vring.num);
331 }
332 
333 /* Called within rcu_read_lock().  */
334 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
335                                     int i)
336 {
337     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
338     hwaddr pa = offsetof(VRingUsed, ring[i]);
339 
340     if (!caches) {
341         return;
342     }
343 
344     virtio_tswap32s(vq->vdev, &uelem->id);
345     virtio_tswap32s(vq->vdev, &uelem->len);
346     address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
347     address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
348 }
349 
350 /* Called within rcu_read_lock().  */
351 static uint16_t vring_used_idx(VirtQueue *vq)
352 {
353     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
354     hwaddr pa = offsetof(VRingUsed, idx);
355 
356     if (!caches) {
357         return 0;
358     }
359 
360     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
361 }
362 
363 /* Called within rcu_read_lock().  */
364 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
365 {
366     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
367     hwaddr pa = offsetof(VRingUsed, idx);
368 
369     if (caches) {
370         virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
371         address_space_cache_invalidate(&caches->used, pa, sizeof(val));
372     }
373 
374     vq->used_idx = val;
375 }
376 
377 /* Called within rcu_read_lock().  */
378 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
379 {
380     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
381     VirtIODevice *vdev = vq->vdev;
382     hwaddr pa = offsetof(VRingUsed, flags);
383     uint16_t flags;
384 
385     if (!caches) {
386         return;
387     }
388 
389     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
390     virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
391     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
392 }
393 
394 /* Called within rcu_read_lock().  */
395 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
396 {
397     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
398     VirtIODevice *vdev = vq->vdev;
399     hwaddr pa = offsetof(VRingUsed, flags);
400     uint16_t flags;
401 
402     if (!caches) {
403         return;
404     }
405 
406     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
407     virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
408     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
409 }
410 
411 /* Called within rcu_read_lock().  */
412 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
413 {
414     VRingMemoryRegionCaches *caches;
415     hwaddr pa;
416     if (!vq->notification) {
417         return;
418     }
419 
420     caches = vring_get_region_caches(vq);
421     if (!caches) {
422         return;
423     }
424 
425     pa = offsetof(VRingUsed, ring[vq->vring.num]);
426     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
427     address_space_cache_invalidate(&caches->used, pa, sizeof(val));
428 }
429 
430 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
431 {
432     RCU_READ_LOCK_GUARD();
433 
434     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
435         vring_set_avail_event(vq, vring_avail_idx(vq));
436     } else if (enable) {
437         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
438     } else {
439         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
440     }
441     if (enable) {
442         /* Expose avail event/used flags before caller checks the avail idx. */
443         smp_mb();
444     }
445 }
446 
447 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
448 {
449     uint16_t off_wrap;
450     VRingPackedDescEvent e;
451     VRingMemoryRegionCaches *caches;
452 
453     RCU_READ_LOCK_GUARD();
454     caches = vring_get_region_caches(vq);
455     if (!caches) {
456         return;
457     }
458 
459     vring_packed_event_read(vq->vdev, &caches->used, &e);
460 
461     if (!enable) {
462         e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
463     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
464         off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
465         vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
466         /* Make sure off_wrap is wrote before flags */
467         smp_wmb();
468         e.flags = VRING_PACKED_EVENT_FLAG_DESC;
469     } else {
470         e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
471     }
472 
473     vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
474     if (enable) {
475         /* Expose avail event/used flags before caller checks the avail idx. */
476         smp_mb();
477     }
478 }
479 
480 bool virtio_queue_get_notification(VirtQueue *vq)
481 {
482     return vq->notification;
483 }
484 
485 void virtio_queue_set_notification(VirtQueue *vq, int enable)
486 {
487     vq->notification = enable;
488 
489     if (!vq->vring.desc) {
490         return;
491     }
492 
493     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
494         virtio_queue_packed_set_notification(vq, enable);
495     } else {
496         virtio_queue_split_set_notification(vq, enable);
497     }
498 }
499 
500 int virtio_queue_ready(VirtQueue *vq)
501 {
502     return vq->vring.avail != 0;
503 }
504 
505 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
506                                          uint16_t *flags,
507                                          MemoryRegionCache *cache,
508                                          int i)
509 {
510     address_space_read_cached(cache,
511                               i * sizeof(VRingPackedDesc) +
512                               offsetof(VRingPackedDesc, flags),
513                               flags, sizeof(*flags));
514     virtio_tswap16s(vdev, flags);
515 }
516 
517 static void vring_packed_desc_read(VirtIODevice *vdev,
518                                    VRingPackedDesc *desc,
519                                    MemoryRegionCache *cache,
520                                    int i, bool strict_order)
521 {
522     hwaddr off = i * sizeof(VRingPackedDesc);
523 
524     vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
525 
526     if (strict_order) {
527         /* Make sure flags is read before the rest fields. */
528         smp_rmb();
529     }
530 
531     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
532                               &desc->addr, sizeof(desc->addr));
533     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
534                               &desc->id, sizeof(desc->id));
535     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
536                               &desc->len, sizeof(desc->len));
537     virtio_tswap64s(vdev, &desc->addr);
538     virtio_tswap16s(vdev, &desc->id);
539     virtio_tswap32s(vdev, &desc->len);
540 }
541 
542 static void vring_packed_desc_write_data(VirtIODevice *vdev,
543                                          VRingPackedDesc *desc,
544                                          MemoryRegionCache *cache,
545                                          int i)
546 {
547     hwaddr off_id = i * sizeof(VRingPackedDesc) +
548                     offsetof(VRingPackedDesc, id);
549     hwaddr off_len = i * sizeof(VRingPackedDesc) +
550                     offsetof(VRingPackedDesc, len);
551 
552     virtio_tswap32s(vdev, &desc->len);
553     virtio_tswap16s(vdev, &desc->id);
554     address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
555     address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
556     address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
557     address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
558 }
559 
560 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
561                                           VRingPackedDesc *desc,
562                                           MemoryRegionCache *cache,
563                                           int i)
564 {
565     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
566 
567     virtio_tswap16s(vdev, &desc->flags);
568     address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags));
569     address_space_cache_invalidate(cache, off, sizeof(desc->flags));
570 }
571 
572 static void vring_packed_desc_write(VirtIODevice *vdev,
573                                     VRingPackedDesc *desc,
574                                     MemoryRegionCache *cache,
575                                     int i, bool strict_order)
576 {
577     vring_packed_desc_write_data(vdev, desc, cache, i);
578     if (strict_order) {
579         /* Make sure data is wrote before flags. */
580         smp_wmb();
581     }
582     vring_packed_desc_write_flags(vdev, desc, cache, i);
583 }
584 
585 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
586 {
587     bool avail, used;
588 
589     avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
590     used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
591     return (avail != used) && (avail == wrap_counter);
592 }
593 
594 /* Fetch avail_idx from VQ memory only when we really need to know if
595  * guest has added some buffers.
596  * Called within rcu_read_lock().  */
597 static int virtio_queue_empty_rcu(VirtQueue *vq)
598 {
599     if (virtio_device_disabled(vq->vdev)) {
600         return 1;
601     }
602 
603     if (unlikely(!vq->vring.avail)) {
604         return 1;
605     }
606 
607     if (vq->shadow_avail_idx != vq->last_avail_idx) {
608         return 0;
609     }
610 
611     return vring_avail_idx(vq) == vq->last_avail_idx;
612 }
613 
614 static int virtio_queue_split_empty(VirtQueue *vq)
615 {
616     bool empty;
617 
618     if (virtio_device_disabled(vq->vdev)) {
619         return 1;
620     }
621 
622     if (unlikely(!vq->vring.avail)) {
623         return 1;
624     }
625 
626     if (vq->shadow_avail_idx != vq->last_avail_idx) {
627         return 0;
628     }
629 
630     RCU_READ_LOCK_GUARD();
631     empty = vring_avail_idx(vq) == vq->last_avail_idx;
632     return empty;
633 }
634 
635 /* Called within rcu_read_lock().  */
636 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
637 {
638     struct VRingPackedDesc desc;
639     VRingMemoryRegionCaches *cache;
640 
641     if (unlikely(!vq->vring.desc)) {
642         return 1;
643     }
644 
645     cache = vring_get_region_caches(vq);
646     if (!cache) {
647         return 1;
648     }
649 
650     vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
651                                  vq->last_avail_idx);
652 
653     return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
654 }
655 
656 static int virtio_queue_packed_empty(VirtQueue *vq)
657 {
658     RCU_READ_LOCK_GUARD();
659     return virtio_queue_packed_empty_rcu(vq);
660 }
661 
662 int virtio_queue_empty(VirtQueue *vq)
663 {
664     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
665         return virtio_queue_packed_empty(vq);
666     } else {
667         return virtio_queue_split_empty(vq);
668     }
669 }
670 
671 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
672                                unsigned int len)
673 {
674     AddressSpace *dma_as = vq->vdev->dma_as;
675     unsigned int offset;
676     int i;
677 
678     offset = 0;
679     for (i = 0; i < elem->in_num; i++) {
680         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
681 
682         dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
683                          elem->in_sg[i].iov_len,
684                          DMA_DIRECTION_FROM_DEVICE, size);
685 
686         offset += size;
687     }
688 
689     for (i = 0; i < elem->out_num; i++)
690         dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
691                          elem->out_sg[i].iov_len,
692                          DMA_DIRECTION_TO_DEVICE,
693                          elem->out_sg[i].iov_len);
694 }
695 
696 /* virtqueue_detach_element:
697  * @vq: The #VirtQueue
698  * @elem: The #VirtQueueElement
699  * @len: number of bytes written
700  *
701  * Detach the element from the virtqueue.  This function is suitable for device
702  * reset or other situations where a #VirtQueueElement is simply freed and will
703  * not be pushed or discarded.
704  */
705 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
706                               unsigned int len)
707 {
708     vq->inuse -= elem->ndescs;
709     virtqueue_unmap_sg(vq, elem, len);
710 }
711 
712 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
713 {
714     vq->last_avail_idx -= num;
715 }
716 
717 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
718 {
719     if (vq->last_avail_idx < num) {
720         vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
721         vq->last_avail_wrap_counter ^= 1;
722     } else {
723         vq->last_avail_idx -= num;
724     }
725 }
726 
727 /* virtqueue_unpop:
728  * @vq: The #VirtQueue
729  * @elem: The #VirtQueueElement
730  * @len: number of bytes written
731  *
732  * Pretend the most recent element wasn't popped from the virtqueue.  The next
733  * call to virtqueue_pop() will refetch the element.
734  */
735 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
736                      unsigned int len)
737 {
738 
739     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
740         virtqueue_packed_rewind(vq, 1);
741     } else {
742         virtqueue_split_rewind(vq, 1);
743     }
744 
745     virtqueue_detach_element(vq, elem, len);
746 }
747 
748 /* virtqueue_rewind:
749  * @vq: The #VirtQueue
750  * @num: Number of elements to push back
751  *
752  * Pretend that elements weren't popped from the virtqueue.  The next
753  * virtqueue_pop() will refetch the oldest element.
754  *
755  * Use virtqueue_unpop() instead if you have a VirtQueueElement.
756  *
757  * Returns: true on success, false if @num is greater than the number of in use
758  * elements.
759  */
760 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
761 {
762     if (num > vq->inuse) {
763         return false;
764     }
765 
766     vq->inuse -= num;
767     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
768         virtqueue_packed_rewind(vq, num);
769     } else {
770         virtqueue_split_rewind(vq, num);
771     }
772     return true;
773 }
774 
775 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
776                     unsigned int len, unsigned int idx)
777 {
778     VRingUsedElem uelem;
779 
780     if (unlikely(!vq->vring.used)) {
781         return;
782     }
783 
784     idx = (idx + vq->used_idx) % vq->vring.num;
785 
786     uelem.id = elem->index;
787     uelem.len = len;
788     vring_used_write(vq, &uelem, idx);
789 }
790 
791 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
792                                   unsigned int len, unsigned int idx)
793 {
794     vq->used_elems[idx].index = elem->index;
795     vq->used_elems[idx].len = len;
796     vq->used_elems[idx].ndescs = elem->ndescs;
797 }
798 
799 static void virtqueue_packed_fill_desc(VirtQueue *vq,
800                                        const VirtQueueElement *elem,
801                                        unsigned int idx,
802                                        bool strict_order)
803 {
804     uint16_t head;
805     VRingMemoryRegionCaches *caches;
806     VRingPackedDesc desc = {
807         .id = elem->index,
808         .len = elem->len,
809     };
810     bool wrap_counter = vq->used_wrap_counter;
811 
812     if (unlikely(!vq->vring.desc)) {
813         return;
814     }
815 
816     head = vq->used_idx + idx;
817     if (head >= vq->vring.num) {
818         head -= vq->vring.num;
819         wrap_counter ^= 1;
820     }
821     if (wrap_counter) {
822         desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
823         desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
824     } else {
825         desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
826         desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
827     }
828 
829     caches = vring_get_region_caches(vq);
830     if (!caches) {
831         return;
832     }
833 
834     vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
835 }
836 
837 /* Called within rcu_read_lock().  */
838 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
839                     unsigned int len, unsigned int idx)
840 {
841     trace_virtqueue_fill(vq, elem, len, idx);
842 
843     virtqueue_unmap_sg(vq, elem, len);
844 
845     if (virtio_device_disabled(vq->vdev)) {
846         return;
847     }
848 
849     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
850         virtqueue_packed_fill(vq, elem, len, idx);
851     } else {
852         virtqueue_split_fill(vq, elem, len, idx);
853     }
854 }
855 
856 /* Called within rcu_read_lock().  */
857 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
858 {
859     uint16_t old, new;
860 
861     if (unlikely(!vq->vring.used)) {
862         return;
863     }
864 
865     /* Make sure buffer is written before we update index. */
866     smp_wmb();
867     trace_virtqueue_flush(vq, count);
868     old = vq->used_idx;
869     new = old + count;
870     vring_used_idx_set(vq, new);
871     vq->inuse -= count;
872     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
873         vq->signalled_used_valid = false;
874 }
875 
876 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
877 {
878     unsigned int i, ndescs = 0;
879 
880     if (unlikely(!vq->vring.desc)) {
881         return;
882     }
883 
884     for (i = 1; i < count; i++) {
885         virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
886         ndescs += vq->used_elems[i].ndescs;
887     }
888     virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
889     ndescs += vq->used_elems[0].ndescs;
890 
891     vq->inuse -= ndescs;
892     vq->used_idx += ndescs;
893     if (vq->used_idx >= vq->vring.num) {
894         vq->used_idx -= vq->vring.num;
895         vq->used_wrap_counter ^= 1;
896     }
897 }
898 
899 void virtqueue_flush(VirtQueue *vq, unsigned int count)
900 {
901     if (virtio_device_disabled(vq->vdev)) {
902         vq->inuse -= count;
903         return;
904     }
905 
906     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
907         virtqueue_packed_flush(vq, count);
908     } else {
909         virtqueue_split_flush(vq, count);
910     }
911 }
912 
913 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
914                     unsigned int len)
915 {
916     RCU_READ_LOCK_GUARD();
917     virtqueue_fill(vq, elem, len, 0);
918     virtqueue_flush(vq, 1);
919 }
920 
921 /* Called within rcu_read_lock().  */
922 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
923 {
924     uint16_t num_heads = vring_avail_idx(vq) - idx;
925 
926     /* Check it isn't doing very strange things with descriptor numbers. */
927     if (num_heads > vq->vring.num) {
928         virtio_error(vq->vdev, "Guest moved used index from %u to %u",
929                      idx, vq->shadow_avail_idx);
930         return -EINVAL;
931     }
932     /* On success, callers read a descriptor at vq->last_avail_idx.
933      * Make sure descriptor read does not bypass avail index read. */
934     if (num_heads) {
935         smp_rmb();
936     }
937 
938     return num_heads;
939 }
940 
941 /* Called within rcu_read_lock().  */
942 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
943                                unsigned int *head)
944 {
945     /* Grab the next descriptor number they're advertising, and increment
946      * the index we've seen. */
947     *head = vring_avail_ring(vq, idx % vq->vring.num);
948 
949     /* If their number is silly, that's a fatal mistake. */
950     if (*head >= vq->vring.num) {
951         virtio_error(vq->vdev, "Guest says index %u is available", *head);
952         return false;
953     }
954 
955     return true;
956 }
957 
958 enum {
959     VIRTQUEUE_READ_DESC_ERROR = -1,
960     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
961     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
962 };
963 
964 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
965                                           MemoryRegionCache *desc_cache,
966                                           unsigned int max, unsigned int *next)
967 {
968     /* If this descriptor says it doesn't chain, we're done. */
969     if (!(desc->flags & VRING_DESC_F_NEXT)) {
970         return VIRTQUEUE_READ_DESC_DONE;
971     }
972 
973     /* Check they're not leading us off end of descriptors. */
974     *next = desc->next;
975     /* Make sure compiler knows to grab that: we don't want it changing! */
976     smp_wmb();
977 
978     if (*next >= max) {
979         virtio_error(vdev, "Desc next is %u", *next);
980         return VIRTQUEUE_READ_DESC_ERROR;
981     }
982 
983     vring_split_desc_read(vdev, desc, desc_cache, *next);
984     return VIRTQUEUE_READ_DESC_MORE;
985 }
986 
987 /* Called within rcu_read_lock().  */
988 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
989                             unsigned int *in_bytes, unsigned int *out_bytes,
990                             unsigned max_in_bytes, unsigned max_out_bytes,
991                             VRingMemoryRegionCaches *caches)
992 {
993     VirtIODevice *vdev = vq->vdev;
994     unsigned int max, idx;
995     unsigned int total_bufs, in_total, out_total;
996     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
997     int64_t len = 0;
998     int rc;
999 
1000     idx = vq->last_avail_idx;
1001     total_bufs = in_total = out_total = 0;
1002 
1003     max = vq->vring.num;
1004 
1005     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1006         MemoryRegionCache *desc_cache = &caches->desc;
1007         unsigned int num_bufs;
1008         VRingDesc desc;
1009         unsigned int i;
1010 
1011         num_bufs = total_bufs;
1012 
1013         if (!virtqueue_get_head(vq, idx++, &i)) {
1014             goto err;
1015         }
1016 
1017         vring_split_desc_read(vdev, &desc, desc_cache, i);
1018 
1019         if (desc.flags & VRING_DESC_F_INDIRECT) {
1020             if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1021                 virtio_error(vdev, "Invalid size for indirect buffer table");
1022                 goto err;
1023             }
1024 
1025             /* If we've got too many, that implies a descriptor loop. */
1026             if (num_bufs >= max) {
1027                 virtio_error(vdev, "Looped descriptor");
1028                 goto err;
1029             }
1030 
1031             /* loop over the indirect descriptor table */
1032             len = address_space_cache_init(&indirect_desc_cache,
1033                                            vdev->dma_as,
1034                                            desc.addr, desc.len, false);
1035             desc_cache = &indirect_desc_cache;
1036             if (len < desc.len) {
1037                 virtio_error(vdev, "Cannot map indirect buffer");
1038                 goto err;
1039             }
1040 
1041             max = desc.len / sizeof(VRingDesc);
1042             num_bufs = i = 0;
1043             vring_split_desc_read(vdev, &desc, desc_cache, i);
1044         }
1045 
1046         do {
1047             /* If we've got too many, that implies a descriptor loop. */
1048             if (++num_bufs > max) {
1049                 virtio_error(vdev, "Looped descriptor");
1050                 goto err;
1051             }
1052 
1053             if (desc.flags & VRING_DESC_F_WRITE) {
1054                 in_total += desc.len;
1055             } else {
1056                 out_total += desc.len;
1057             }
1058             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1059                 goto done;
1060             }
1061 
1062             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1063         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1064 
1065         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1066             goto err;
1067         }
1068 
1069         if (desc_cache == &indirect_desc_cache) {
1070             address_space_cache_destroy(&indirect_desc_cache);
1071             total_bufs++;
1072         } else {
1073             total_bufs = num_bufs;
1074         }
1075     }
1076 
1077     if (rc < 0) {
1078         goto err;
1079     }
1080 
1081 done:
1082     address_space_cache_destroy(&indirect_desc_cache);
1083     if (in_bytes) {
1084         *in_bytes = in_total;
1085     }
1086     if (out_bytes) {
1087         *out_bytes = out_total;
1088     }
1089     return;
1090 
1091 err:
1092     in_total = out_total = 0;
1093     goto done;
1094 }
1095 
1096 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1097                                            VRingPackedDesc *desc,
1098                                            MemoryRegionCache
1099                                            *desc_cache,
1100                                            unsigned int max,
1101                                            unsigned int *next,
1102                                            bool indirect)
1103 {
1104     /* If this descriptor says it doesn't chain, we're done. */
1105     if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1106         return VIRTQUEUE_READ_DESC_DONE;
1107     }
1108 
1109     ++*next;
1110     if (*next == max) {
1111         if (indirect) {
1112             return VIRTQUEUE_READ_DESC_DONE;
1113         } else {
1114             (*next) -= vq->vring.num;
1115         }
1116     }
1117 
1118     vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1119     return VIRTQUEUE_READ_DESC_MORE;
1120 }
1121 
1122 /* Called within rcu_read_lock().  */
1123 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1124                                              unsigned int *in_bytes,
1125                                              unsigned int *out_bytes,
1126                                              unsigned max_in_bytes,
1127                                              unsigned max_out_bytes,
1128                                              VRingMemoryRegionCaches *caches)
1129 {
1130     VirtIODevice *vdev = vq->vdev;
1131     unsigned int max, idx;
1132     unsigned int total_bufs, in_total, out_total;
1133     MemoryRegionCache *desc_cache;
1134     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1135     int64_t len = 0;
1136     VRingPackedDesc desc;
1137     bool wrap_counter;
1138 
1139     idx = vq->last_avail_idx;
1140     wrap_counter = vq->last_avail_wrap_counter;
1141     total_bufs = in_total = out_total = 0;
1142 
1143     max = vq->vring.num;
1144 
1145     for (;;) {
1146         unsigned int num_bufs = total_bufs;
1147         unsigned int i = idx;
1148         int rc;
1149 
1150         desc_cache = &caches->desc;
1151         vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1152         if (!is_desc_avail(desc.flags, wrap_counter)) {
1153             break;
1154         }
1155 
1156         if (desc.flags & VRING_DESC_F_INDIRECT) {
1157             if (desc.len % sizeof(VRingPackedDesc)) {
1158                 virtio_error(vdev, "Invalid size for indirect buffer table");
1159                 goto err;
1160             }
1161 
1162             /* If we've got too many, that implies a descriptor loop. */
1163             if (num_bufs >= max) {
1164                 virtio_error(vdev, "Looped descriptor");
1165                 goto err;
1166             }
1167 
1168             /* loop over the indirect descriptor table */
1169             len = address_space_cache_init(&indirect_desc_cache,
1170                                            vdev->dma_as,
1171                                            desc.addr, desc.len, false);
1172             desc_cache = &indirect_desc_cache;
1173             if (len < desc.len) {
1174                 virtio_error(vdev, "Cannot map indirect buffer");
1175                 goto err;
1176             }
1177 
1178             max = desc.len / sizeof(VRingPackedDesc);
1179             num_bufs = i = 0;
1180             vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1181         }
1182 
1183         do {
1184             /* If we've got too many, that implies a descriptor loop. */
1185             if (++num_bufs > max) {
1186                 virtio_error(vdev, "Looped descriptor");
1187                 goto err;
1188             }
1189 
1190             if (desc.flags & VRING_DESC_F_WRITE) {
1191                 in_total += desc.len;
1192             } else {
1193                 out_total += desc.len;
1194             }
1195             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1196                 goto done;
1197             }
1198 
1199             rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1200                                                  &i, desc_cache ==
1201                                                  &indirect_desc_cache);
1202         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1203 
1204         if (desc_cache == &indirect_desc_cache) {
1205             address_space_cache_destroy(&indirect_desc_cache);
1206             total_bufs++;
1207             idx++;
1208         } else {
1209             idx += num_bufs - total_bufs;
1210             total_bufs = num_bufs;
1211         }
1212 
1213         if (idx >= vq->vring.num) {
1214             idx -= vq->vring.num;
1215             wrap_counter ^= 1;
1216         }
1217     }
1218 
1219     /* Record the index and wrap counter for a kick we want */
1220     vq->shadow_avail_idx = idx;
1221     vq->shadow_avail_wrap_counter = wrap_counter;
1222 done:
1223     address_space_cache_destroy(&indirect_desc_cache);
1224     if (in_bytes) {
1225         *in_bytes = in_total;
1226     }
1227     if (out_bytes) {
1228         *out_bytes = out_total;
1229     }
1230     return;
1231 
1232 err:
1233     in_total = out_total = 0;
1234     goto done;
1235 }
1236 
1237 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1238                                unsigned int *out_bytes,
1239                                unsigned max_in_bytes, unsigned max_out_bytes)
1240 {
1241     uint16_t desc_size;
1242     VRingMemoryRegionCaches *caches;
1243 
1244     RCU_READ_LOCK_GUARD();
1245 
1246     if (unlikely(!vq->vring.desc)) {
1247         goto err;
1248     }
1249 
1250     caches = vring_get_region_caches(vq);
1251     if (!caches) {
1252         goto err;
1253     }
1254 
1255     desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1256                                 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1257     if (caches->desc.len < vq->vring.num * desc_size) {
1258         virtio_error(vq->vdev, "Cannot map descriptor ring");
1259         goto err;
1260     }
1261 
1262     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1263         virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1264                                          max_in_bytes, max_out_bytes,
1265                                          caches);
1266     } else {
1267         virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1268                                         max_in_bytes, max_out_bytes,
1269                                         caches);
1270     }
1271 
1272     return;
1273 err:
1274     if (in_bytes) {
1275         *in_bytes = 0;
1276     }
1277     if (out_bytes) {
1278         *out_bytes = 0;
1279     }
1280 }
1281 
1282 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1283                           unsigned int out_bytes)
1284 {
1285     unsigned int in_total, out_total;
1286 
1287     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1288     return in_bytes <= in_total && out_bytes <= out_total;
1289 }
1290 
1291 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1292                                hwaddr *addr, struct iovec *iov,
1293                                unsigned int max_num_sg, bool is_write,
1294                                hwaddr pa, size_t sz)
1295 {
1296     bool ok = false;
1297     unsigned num_sg = *p_num_sg;
1298     assert(num_sg <= max_num_sg);
1299 
1300     if (!sz) {
1301         virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1302         goto out;
1303     }
1304 
1305     while (sz) {
1306         hwaddr len = sz;
1307 
1308         if (num_sg == max_num_sg) {
1309             virtio_error(vdev, "virtio: too many write descriptors in "
1310                                "indirect table");
1311             goto out;
1312         }
1313 
1314         iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1315                                               is_write ?
1316                                               DMA_DIRECTION_FROM_DEVICE :
1317                                               DMA_DIRECTION_TO_DEVICE);
1318         if (!iov[num_sg].iov_base) {
1319             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1320             goto out;
1321         }
1322 
1323         iov[num_sg].iov_len = len;
1324         addr[num_sg] = pa;
1325 
1326         sz -= len;
1327         pa += len;
1328         num_sg++;
1329     }
1330     ok = true;
1331 
1332 out:
1333     *p_num_sg = num_sg;
1334     return ok;
1335 }
1336 
1337 /* Only used by error code paths before we have a VirtQueueElement (therefore
1338  * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1339  * yet.
1340  */
1341 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1342                                     struct iovec *iov)
1343 {
1344     unsigned int i;
1345 
1346     for (i = 0; i < out_num + in_num; i++) {
1347         int is_write = i >= out_num;
1348 
1349         cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1350         iov++;
1351     }
1352 }
1353 
1354 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1355                                 hwaddr *addr, unsigned int num_sg,
1356                                 bool is_write)
1357 {
1358     unsigned int i;
1359     hwaddr len;
1360 
1361     for (i = 0; i < num_sg; i++) {
1362         len = sg[i].iov_len;
1363         sg[i].iov_base = dma_memory_map(vdev->dma_as,
1364                                         addr[i], &len, is_write ?
1365                                         DMA_DIRECTION_FROM_DEVICE :
1366                                         DMA_DIRECTION_TO_DEVICE);
1367         if (!sg[i].iov_base) {
1368             error_report("virtio: error trying to map MMIO memory");
1369             exit(1);
1370         }
1371         if (len != sg[i].iov_len) {
1372             error_report("virtio: unexpected memory split");
1373             exit(1);
1374         }
1375     }
1376 }
1377 
1378 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1379 {
1380     virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1381     virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1382                                                                         false);
1383 }
1384 
1385 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1386 {
1387     VirtQueueElement *elem;
1388     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1389     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1390     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1391     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1392     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1393     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1394 
1395     assert(sz >= sizeof(VirtQueueElement));
1396     elem = g_malloc(out_sg_end);
1397     trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1398     elem->out_num = out_num;
1399     elem->in_num = in_num;
1400     elem->in_addr = (void *)elem + in_addr_ofs;
1401     elem->out_addr = (void *)elem + out_addr_ofs;
1402     elem->in_sg = (void *)elem + in_sg_ofs;
1403     elem->out_sg = (void *)elem + out_sg_ofs;
1404     return elem;
1405 }
1406 
1407 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1408 {
1409     unsigned int i, head, max;
1410     VRingMemoryRegionCaches *caches;
1411     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1412     MemoryRegionCache *desc_cache;
1413     int64_t len;
1414     VirtIODevice *vdev = vq->vdev;
1415     VirtQueueElement *elem = NULL;
1416     unsigned out_num, in_num, elem_entries;
1417     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1418     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1419     VRingDesc desc;
1420     int rc;
1421 
1422     RCU_READ_LOCK_GUARD();
1423     if (virtio_queue_empty_rcu(vq)) {
1424         goto done;
1425     }
1426     /* Needed after virtio_queue_empty(), see comment in
1427      * virtqueue_num_heads(). */
1428     smp_rmb();
1429 
1430     /* When we start there are none of either input nor output. */
1431     out_num = in_num = elem_entries = 0;
1432 
1433     max = vq->vring.num;
1434 
1435     if (vq->inuse >= vq->vring.num) {
1436         virtio_error(vdev, "Virtqueue size exceeded");
1437         goto done;
1438     }
1439 
1440     if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1441         goto done;
1442     }
1443 
1444     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1445         vring_set_avail_event(vq, vq->last_avail_idx);
1446     }
1447 
1448     i = head;
1449 
1450     caches = vring_get_region_caches(vq);
1451     if (!caches) {
1452         virtio_error(vdev, "Region caches not initialized");
1453         goto done;
1454     }
1455 
1456     if (caches->desc.len < max * sizeof(VRingDesc)) {
1457         virtio_error(vdev, "Cannot map descriptor ring");
1458         goto done;
1459     }
1460 
1461     desc_cache = &caches->desc;
1462     vring_split_desc_read(vdev, &desc, desc_cache, i);
1463     if (desc.flags & VRING_DESC_F_INDIRECT) {
1464         if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1465             virtio_error(vdev, "Invalid size for indirect buffer table");
1466             goto done;
1467         }
1468 
1469         /* loop over the indirect descriptor table */
1470         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1471                                        desc.addr, desc.len, false);
1472         desc_cache = &indirect_desc_cache;
1473         if (len < desc.len) {
1474             virtio_error(vdev, "Cannot map indirect buffer");
1475             goto done;
1476         }
1477 
1478         max = desc.len / sizeof(VRingDesc);
1479         i = 0;
1480         vring_split_desc_read(vdev, &desc, desc_cache, i);
1481     }
1482 
1483     /* Collect all the descriptors */
1484     do {
1485         bool map_ok;
1486 
1487         if (desc.flags & VRING_DESC_F_WRITE) {
1488             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1489                                         iov + out_num,
1490                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1491                                         desc.addr, desc.len);
1492         } else {
1493             if (in_num) {
1494                 virtio_error(vdev, "Incorrect order for descriptors");
1495                 goto err_undo_map;
1496             }
1497             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1498                                         VIRTQUEUE_MAX_SIZE, false,
1499                                         desc.addr, desc.len);
1500         }
1501         if (!map_ok) {
1502             goto err_undo_map;
1503         }
1504 
1505         /* If we've got too many, that implies a descriptor loop. */
1506         if (++elem_entries > max) {
1507             virtio_error(vdev, "Looped descriptor");
1508             goto err_undo_map;
1509         }
1510 
1511         rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1512     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1513 
1514     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1515         goto err_undo_map;
1516     }
1517 
1518     /* Now copy what we have collected and mapped */
1519     elem = virtqueue_alloc_element(sz, out_num, in_num);
1520     elem->index = head;
1521     elem->ndescs = 1;
1522     for (i = 0; i < out_num; i++) {
1523         elem->out_addr[i] = addr[i];
1524         elem->out_sg[i] = iov[i];
1525     }
1526     for (i = 0; i < in_num; i++) {
1527         elem->in_addr[i] = addr[out_num + i];
1528         elem->in_sg[i] = iov[out_num + i];
1529     }
1530 
1531     vq->inuse++;
1532 
1533     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1534 done:
1535     address_space_cache_destroy(&indirect_desc_cache);
1536 
1537     return elem;
1538 
1539 err_undo_map:
1540     virtqueue_undo_map_desc(out_num, in_num, iov);
1541     goto done;
1542 }
1543 
1544 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1545 {
1546     unsigned int i, max;
1547     VRingMemoryRegionCaches *caches;
1548     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1549     MemoryRegionCache *desc_cache;
1550     int64_t len;
1551     VirtIODevice *vdev = vq->vdev;
1552     VirtQueueElement *elem = NULL;
1553     unsigned out_num, in_num, elem_entries;
1554     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1555     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1556     VRingPackedDesc desc;
1557     uint16_t id;
1558     int rc;
1559 
1560     RCU_READ_LOCK_GUARD();
1561     if (virtio_queue_packed_empty_rcu(vq)) {
1562         goto done;
1563     }
1564 
1565     /* When we start there are none of either input nor output. */
1566     out_num = in_num = elem_entries = 0;
1567 
1568     max = vq->vring.num;
1569 
1570     if (vq->inuse >= vq->vring.num) {
1571         virtio_error(vdev, "Virtqueue size exceeded");
1572         goto done;
1573     }
1574 
1575     i = vq->last_avail_idx;
1576 
1577     caches = vring_get_region_caches(vq);
1578     if (!caches) {
1579         virtio_error(vdev, "Region caches not initialized");
1580         goto done;
1581     }
1582 
1583     if (caches->desc.len < max * sizeof(VRingDesc)) {
1584         virtio_error(vdev, "Cannot map descriptor ring");
1585         goto done;
1586     }
1587 
1588     desc_cache = &caches->desc;
1589     vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1590     id = desc.id;
1591     if (desc.flags & VRING_DESC_F_INDIRECT) {
1592         if (desc.len % sizeof(VRingPackedDesc)) {
1593             virtio_error(vdev, "Invalid size for indirect buffer table");
1594             goto done;
1595         }
1596 
1597         /* loop over the indirect descriptor table */
1598         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1599                                        desc.addr, desc.len, false);
1600         desc_cache = &indirect_desc_cache;
1601         if (len < desc.len) {
1602             virtio_error(vdev, "Cannot map indirect buffer");
1603             goto done;
1604         }
1605 
1606         max = desc.len / sizeof(VRingPackedDesc);
1607         i = 0;
1608         vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1609     }
1610 
1611     /* Collect all the descriptors */
1612     do {
1613         bool map_ok;
1614 
1615         if (desc.flags & VRING_DESC_F_WRITE) {
1616             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1617                                         iov + out_num,
1618                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1619                                         desc.addr, desc.len);
1620         } else {
1621             if (in_num) {
1622                 virtio_error(vdev, "Incorrect order for descriptors");
1623                 goto err_undo_map;
1624             }
1625             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1626                                         VIRTQUEUE_MAX_SIZE, false,
1627                                         desc.addr, desc.len);
1628         }
1629         if (!map_ok) {
1630             goto err_undo_map;
1631         }
1632 
1633         /* If we've got too many, that implies a descriptor loop. */
1634         if (++elem_entries > max) {
1635             virtio_error(vdev, "Looped descriptor");
1636             goto err_undo_map;
1637         }
1638 
1639         rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1640                                              desc_cache ==
1641                                              &indirect_desc_cache);
1642     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1643 
1644     /* Now copy what we have collected and mapped */
1645     elem = virtqueue_alloc_element(sz, out_num, in_num);
1646     for (i = 0; i < out_num; i++) {
1647         elem->out_addr[i] = addr[i];
1648         elem->out_sg[i] = iov[i];
1649     }
1650     for (i = 0; i < in_num; i++) {
1651         elem->in_addr[i] = addr[out_num + i];
1652         elem->in_sg[i] = iov[out_num + i];
1653     }
1654 
1655     elem->index = id;
1656     elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1657     vq->last_avail_idx += elem->ndescs;
1658     vq->inuse += elem->ndescs;
1659 
1660     if (vq->last_avail_idx >= vq->vring.num) {
1661         vq->last_avail_idx -= vq->vring.num;
1662         vq->last_avail_wrap_counter ^= 1;
1663     }
1664 
1665     vq->shadow_avail_idx = vq->last_avail_idx;
1666     vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1667 
1668     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1669 done:
1670     address_space_cache_destroy(&indirect_desc_cache);
1671 
1672     return elem;
1673 
1674 err_undo_map:
1675     virtqueue_undo_map_desc(out_num, in_num, iov);
1676     goto done;
1677 }
1678 
1679 void *virtqueue_pop(VirtQueue *vq, size_t sz)
1680 {
1681     if (virtio_device_disabled(vq->vdev)) {
1682         return NULL;
1683     }
1684 
1685     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1686         return virtqueue_packed_pop(vq, sz);
1687     } else {
1688         return virtqueue_split_pop(vq, sz);
1689     }
1690 }
1691 
1692 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1693 {
1694     VRingMemoryRegionCaches *caches;
1695     MemoryRegionCache *desc_cache;
1696     unsigned int dropped = 0;
1697     VirtQueueElement elem = {};
1698     VirtIODevice *vdev = vq->vdev;
1699     VRingPackedDesc desc;
1700 
1701     RCU_READ_LOCK_GUARD();
1702 
1703     caches = vring_get_region_caches(vq);
1704     if (!caches) {
1705         return 0;
1706     }
1707 
1708     desc_cache = &caches->desc;
1709 
1710     virtio_queue_set_notification(vq, 0);
1711 
1712     while (vq->inuse < vq->vring.num) {
1713         unsigned int idx = vq->last_avail_idx;
1714         /*
1715          * works similar to virtqueue_pop but does not map buffers
1716          * and does not allocate any memory.
1717          */
1718         vring_packed_desc_read(vdev, &desc, desc_cache,
1719                                vq->last_avail_idx , true);
1720         if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1721             break;
1722         }
1723         elem.index = desc.id;
1724         elem.ndescs = 1;
1725         while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1726                                                vq->vring.num, &idx, false)) {
1727             ++elem.ndescs;
1728         }
1729         /*
1730          * immediately push the element, nothing to unmap
1731          * as both in_num and out_num are set to 0.
1732          */
1733         virtqueue_push(vq, &elem, 0);
1734         dropped++;
1735         vq->last_avail_idx += elem.ndescs;
1736         if (vq->last_avail_idx >= vq->vring.num) {
1737             vq->last_avail_idx -= vq->vring.num;
1738             vq->last_avail_wrap_counter ^= 1;
1739         }
1740     }
1741 
1742     return dropped;
1743 }
1744 
1745 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1746 {
1747     unsigned int dropped = 0;
1748     VirtQueueElement elem = {};
1749     VirtIODevice *vdev = vq->vdev;
1750     bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1751 
1752     while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1753         /* works similar to virtqueue_pop but does not map buffers
1754         * and does not allocate any memory */
1755         smp_rmb();
1756         if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1757             break;
1758         }
1759         vq->inuse++;
1760         vq->last_avail_idx++;
1761         if (fEventIdx) {
1762             vring_set_avail_event(vq, vq->last_avail_idx);
1763         }
1764         /* immediately push the element, nothing to unmap
1765          * as both in_num and out_num are set to 0 */
1766         virtqueue_push(vq, &elem, 0);
1767         dropped++;
1768     }
1769 
1770     return dropped;
1771 }
1772 
1773 /* virtqueue_drop_all:
1774  * @vq: The #VirtQueue
1775  * Drops all queued buffers and indicates them to the guest
1776  * as if they are done. Useful when buffers can not be
1777  * processed but must be returned to the guest.
1778  */
1779 unsigned int virtqueue_drop_all(VirtQueue *vq)
1780 {
1781     struct VirtIODevice *vdev = vq->vdev;
1782 
1783     if (virtio_device_disabled(vq->vdev)) {
1784         return 0;
1785     }
1786 
1787     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1788         return virtqueue_packed_drop_all(vq);
1789     } else {
1790         return virtqueue_split_drop_all(vq);
1791     }
1792 }
1793 
1794 /* Reading and writing a structure directly to QEMUFile is *awful*, but
1795  * it is what QEMU has always done by mistake.  We can change it sooner
1796  * or later by bumping the version number of the affected vm states.
1797  * In the meanwhile, since the in-memory layout of VirtQueueElement
1798  * has changed, we need to marshal to and from the layout that was
1799  * used before the change.
1800  */
1801 typedef struct VirtQueueElementOld {
1802     unsigned int index;
1803     unsigned int out_num;
1804     unsigned int in_num;
1805     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1806     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1807     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1808     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1809 } VirtQueueElementOld;
1810 
1811 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1812 {
1813     VirtQueueElement *elem;
1814     VirtQueueElementOld data;
1815     int i;
1816 
1817     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1818 
1819     /* TODO: teach all callers that this can fail, and return failure instead
1820      * of asserting here.
1821      * This is just one thing (there are probably more) that must be
1822      * fixed before we can allow NDEBUG compilation.
1823      */
1824     assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1825     assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1826 
1827     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1828     elem->index = data.index;
1829 
1830     for (i = 0; i < elem->in_num; i++) {
1831         elem->in_addr[i] = data.in_addr[i];
1832     }
1833 
1834     for (i = 0; i < elem->out_num; i++) {
1835         elem->out_addr[i] = data.out_addr[i];
1836     }
1837 
1838     for (i = 0; i < elem->in_num; i++) {
1839         /* Base is overwritten by virtqueue_map.  */
1840         elem->in_sg[i].iov_base = 0;
1841         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1842     }
1843 
1844     for (i = 0; i < elem->out_num; i++) {
1845         /* Base is overwritten by virtqueue_map.  */
1846         elem->out_sg[i].iov_base = 0;
1847         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1848     }
1849 
1850     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1851         qemu_get_be32s(f, &elem->ndescs);
1852     }
1853 
1854     virtqueue_map(vdev, elem);
1855     return elem;
1856 }
1857 
1858 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1859                                 VirtQueueElement *elem)
1860 {
1861     VirtQueueElementOld data;
1862     int i;
1863 
1864     memset(&data, 0, sizeof(data));
1865     data.index = elem->index;
1866     data.in_num = elem->in_num;
1867     data.out_num = elem->out_num;
1868 
1869     for (i = 0; i < elem->in_num; i++) {
1870         data.in_addr[i] = elem->in_addr[i];
1871     }
1872 
1873     for (i = 0; i < elem->out_num; i++) {
1874         data.out_addr[i] = elem->out_addr[i];
1875     }
1876 
1877     for (i = 0; i < elem->in_num; i++) {
1878         /* Base is overwritten by virtqueue_map when loading.  Do not
1879          * save it, as it would leak the QEMU address space layout.  */
1880         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1881     }
1882 
1883     for (i = 0; i < elem->out_num; i++) {
1884         /* Do not save iov_base as above.  */
1885         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1886     }
1887 
1888     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1889         qemu_put_be32s(f, &elem->ndescs);
1890     }
1891 
1892     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1893 }
1894 
1895 /* virtio device */
1896 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1897 {
1898     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1899     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1900 
1901     if (virtio_device_disabled(vdev)) {
1902         return;
1903     }
1904 
1905     if (k->notify) {
1906         k->notify(qbus->parent, vector);
1907     }
1908 }
1909 
1910 void virtio_update_irq(VirtIODevice *vdev)
1911 {
1912     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1913 }
1914 
1915 static int virtio_validate_features(VirtIODevice *vdev)
1916 {
1917     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1918 
1919     if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1920         !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1921         return -EFAULT;
1922     }
1923 
1924     if (k->validate_features) {
1925         return k->validate_features(vdev);
1926     } else {
1927         return 0;
1928     }
1929 }
1930 
1931 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1932 {
1933     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1934     trace_virtio_set_status(vdev, val);
1935 
1936     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1937         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
1938             val & VIRTIO_CONFIG_S_FEATURES_OK) {
1939             int ret = virtio_validate_features(vdev);
1940 
1941             if (ret) {
1942                 return ret;
1943             }
1944         }
1945     }
1946 
1947     if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
1948         (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1949         virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
1950     }
1951 
1952     if (k->set_status) {
1953         k->set_status(vdev, val);
1954     }
1955     vdev->status = val;
1956 
1957     return 0;
1958 }
1959 
1960 static enum virtio_device_endian virtio_default_endian(void)
1961 {
1962     if (target_words_bigendian()) {
1963         return VIRTIO_DEVICE_ENDIAN_BIG;
1964     } else {
1965         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1966     }
1967 }
1968 
1969 static enum virtio_device_endian virtio_current_cpu_endian(void)
1970 {
1971     if (cpu_virtio_is_big_endian(current_cpu)) {
1972         return VIRTIO_DEVICE_ENDIAN_BIG;
1973     } else {
1974         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1975     }
1976 }
1977 
1978 void virtio_reset(void *opaque)
1979 {
1980     VirtIODevice *vdev = opaque;
1981     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1982     int i;
1983 
1984     virtio_set_status(vdev, 0);
1985     if (current_cpu) {
1986         /* Guest initiated reset */
1987         vdev->device_endian = virtio_current_cpu_endian();
1988     } else {
1989         /* System reset */
1990         vdev->device_endian = virtio_default_endian();
1991     }
1992 
1993     if (k->reset) {
1994         k->reset(vdev);
1995     }
1996 
1997     vdev->start_on_kick = false;
1998     vdev->started = false;
1999     vdev->broken = false;
2000     vdev->guest_features = 0;
2001     vdev->queue_sel = 0;
2002     vdev->status = 0;
2003     vdev->disabled = false;
2004     qatomic_set(&vdev->isr, 0);
2005     vdev->config_vector = VIRTIO_NO_VECTOR;
2006     virtio_notify_vector(vdev, vdev->config_vector);
2007 
2008     for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2009         vdev->vq[i].vring.desc = 0;
2010         vdev->vq[i].vring.avail = 0;
2011         vdev->vq[i].vring.used = 0;
2012         vdev->vq[i].last_avail_idx = 0;
2013         vdev->vq[i].shadow_avail_idx = 0;
2014         vdev->vq[i].used_idx = 0;
2015         vdev->vq[i].last_avail_wrap_counter = true;
2016         vdev->vq[i].shadow_avail_wrap_counter = true;
2017         vdev->vq[i].used_wrap_counter = true;
2018         virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2019         vdev->vq[i].signalled_used = 0;
2020         vdev->vq[i].signalled_used_valid = false;
2021         vdev->vq[i].notification = true;
2022         vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2023         vdev->vq[i].inuse = 0;
2024         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2025     }
2026 }
2027 
2028 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
2029 {
2030     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2031     uint8_t val;
2032 
2033     if (addr + sizeof(val) > vdev->config_len) {
2034         return (uint32_t)-1;
2035     }
2036 
2037     k->get_config(vdev, vdev->config);
2038 
2039     val = ldub_p(vdev->config + addr);
2040     return val;
2041 }
2042 
2043 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
2044 {
2045     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2046     uint16_t val;
2047 
2048     if (addr + sizeof(val) > vdev->config_len) {
2049         return (uint32_t)-1;
2050     }
2051 
2052     k->get_config(vdev, vdev->config);
2053 
2054     val = lduw_p(vdev->config + addr);
2055     return val;
2056 }
2057 
2058 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
2059 {
2060     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2061     uint32_t val;
2062 
2063     if (addr + sizeof(val) > vdev->config_len) {
2064         return (uint32_t)-1;
2065     }
2066 
2067     k->get_config(vdev, vdev->config);
2068 
2069     val = ldl_p(vdev->config + addr);
2070     return val;
2071 }
2072 
2073 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2074 {
2075     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2076     uint8_t val = data;
2077 
2078     if (addr + sizeof(val) > vdev->config_len) {
2079         return;
2080     }
2081 
2082     stb_p(vdev->config + addr, val);
2083 
2084     if (k->set_config) {
2085         k->set_config(vdev, vdev->config);
2086     }
2087 }
2088 
2089 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2090 {
2091     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2092     uint16_t val = data;
2093 
2094     if (addr + sizeof(val) > vdev->config_len) {
2095         return;
2096     }
2097 
2098     stw_p(vdev->config + addr, val);
2099 
2100     if (k->set_config) {
2101         k->set_config(vdev, vdev->config);
2102     }
2103 }
2104 
2105 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2106 {
2107     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2108     uint32_t val = data;
2109 
2110     if (addr + sizeof(val) > vdev->config_len) {
2111         return;
2112     }
2113 
2114     stl_p(vdev->config + addr, val);
2115 
2116     if (k->set_config) {
2117         k->set_config(vdev, vdev->config);
2118     }
2119 }
2120 
2121 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2122 {
2123     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2124     uint8_t val;
2125 
2126     if (addr + sizeof(val) > vdev->config_len) {
2127         return (uint32_t)-1;
2128     }
2129 
2130     k->get_config(vdev, vdev->config);
2131 
2132     val = ldub_p(vdev->config + addr);
2133     return val;
2134 }
2135 
2136 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2137 {
2138     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2139     uint16_t val;
2140 
2141     if (addr + sizeof(val) > vdev->config_len) {
2142         return (uint32_t)-1;
2143     }
2144 
2145     k->get_config(vdev, vdev->config);
2146 
2147     val = lduw_le_p(vdev->config + addr);
2148     return val;
2149 }
2150 
2151 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2152 {
2153     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2154     uint32_t val;
2155 
2156     if (addr + sizeof(val) > vdev->config_len) {
2157         return (uint32_t)-1;
2158     }
2159 
2160     k->get_config(vdev, vdev->config);
2161 
2162     val = ldl_le_p(vdev->config + addr);
2163     return val;
2164 }
2165 
2166 void virtio_config_modern_writeb(VirtIODevice *vdev,
2167                                  uint32_t addr, uint32_t data)
2168 {
2169     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2170     uint8_t val = data;
2171 
2172     if (addr + sizeof(val) > vdev->config_len) {
2173         return;
2174     }
2175 
2176     stb_p(vdev->config + addr, val);
2177 
2178     if (k->set_config) {
2179         k->set_config(vdev, vdev->config);
2180     }
2181 }
2182 
2183 void virtio_config_modern_writew(VirtIODevice *vdev,
2184                                  uint32_t addr, uint32_t data)
2185 {
2186     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2187     uint16_t val = data;
2188 
2189     if (addr + sizeof(val) > vdev->config_len) {
2190         return;
2191     }
2192 
2193     stw_le_p(vdev->config + addr, val);
2194 
2195     if (k->set_config) {
2196         k->set_config(vdev, vdev->config);
2197     }
2198 }
2199 
2200 void virtio_config_modern_writel(VirtIODevice *vdev,
2201                                  uint32_t addr, uint32_t data)
2202 {
2203     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2204     uint32_t val = data;
2205 
2206     if (addr + sizeof(val) > vdev->config_len) {
2207         return;
2208     }
2209 
2210     stl_le_p(vdev->config + addr, val);
2211 
2212     if (k->set_config) {
2213         k->set_config(vdev, vdev->config);
2214     }
2215 }
2216 
2217 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2218 {
2219     if (!vdev->vq[n].vring.num) {
2220         return;
2221     }
2222     vdev->vq[n].vring.desc = addr;
2223     virtio_queue_update_rings(vdev, n);
2224 }
2225 
2226 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2227 {
2228     return vdev->vq[n].vring.desc;
2229 }
2230 
2231 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2232                             hwaddr avail, hwaddr used)
2233 {
2234     if (!vdev->vq[n].vring.num) {
2235         return;
2236     }
2237     vdev->vq[n].vring.desc = desc;
2238     vdev->vq[n].vring.avail = avail;
2239     vdev->vq[n].vring.used = used;
2240     virtio_init_region_cache(vdev, n);
2241 }
2242 
2243 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2244 {
2245     /* Don't allow guest to flip queue between existent and
2246      * nonexistent states, or to set it to an invalid size.
2247      */
2248     if (!!num != !!vdev->vq[n].vring.num ||
2249         num > VIRTQUEUE_MAX_SIZE ||
2250         num < 0) {
2251         return;
2252     }
2253     vdev->vq[n].vring.num = num;
2254 }
2255 
2256 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2257 {
2258     return QLIST_FIRST(&vdev->vector_queues[vector]);
2259 }
2260 
2261 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2262 {
2263     return QLIST_NEXT(vq, node);
2264 }
2265 
2266 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2267 {
2268     return vdev->vq[n].vring.num;
2269 }
2270 
2271 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2272 {
2273     return vdev->vq[n].vring.num_default;
2274 }
2275 
2276 int virtio_get_num_queues(VirtIODevice *vdev)
2277 {
2278     int i;
2279 
2280     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2281         if (!virtio_queue_get_num(vdev, i)) {
2282             break;
2283         }
2284     }
2285 
2286     return i;
2287 }
2288 
2289 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2290 {
2291     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2292     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2293 
2294     /* virtio-1 compliant devices cannot change the alignment */
2295     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2296         error_report("tried to modify queue alignment for virtio-1 device");
2297         return;
2298     }
2299     /* Check that the transport told us it was going to do this
2300      * (so a buggy transport will immediately assert rather than
2301      * silently failing to migrate this state)
2302      */
2303     assert(k->has_variable_vring_alignment);
2304 
2305     if (align) {
2306         vdev->vq[n].vring.align = align;
2307         virtio_queue_update_rings(vdev, n);
2308     }
2309 }
2310 
2311 static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
2312 {
2313     bool ret = false;
2314 
2315     if (vq->vring.desc && vq->handle_aio_output) {
2316         VirtIODevice *vdev = vq->vdev;
2317 
2318         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2319         ret = vq->handle_aio_output(vdev, vq);
2320 
2321         if (unlikely(vdev->start_on_kick)) {
2322             virtio_set_started(vdev, true);
2323         }
2324     }
2325 
2326     return ret;
2327 }
2328 
2329 static void virtio_queue_notify_vq(VirtQueue *vq)
2330 {
2331     if (vq->vring.desc && vq->handle_output) {
2332         VirtIODevice *vdev = vq->vdev;
2333 
2334         if (unlikely(vdev->broken)) {
2335             return;
2336         }
2337 
2338         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2339         vq->handle_output(vdev, vq);
2340 
2341         if (unlikely(vdev->start_on_kick)) {
2342             virtio_set_started(vdev, true);
2343         }
2344     }
2345 }
2346 
2347 void virtio_queue_notify(VirtIODevice *vdev, int n)
2348 {
2349     VirtQueue *vq = &vdev->vq[n];
2350 
2351     if (unlikely(!vq->vring.desc || vdev->broken)) {
2352         return;
2353     }
2354 
2355     trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2356     if (vq->host_notifier_enabled) {
2357         event_notifier_set(&vq->host_notifier);
2358     } else if (vq->handle_output) {
2359         vq->handle_output(vdev, vq);
2360 
2361         if (unlikely(vdev->start_on_kick)) {
2362             virtio_set_started(vdev, true);
2363         }
2364     }
2365 }
2366 
2367 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2368 {
2369     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2370         VIRTIO_NO_VECTOR;
2371 }
2372 
2373 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2374 {
2375     VirtQueue *vq = &vdev->vq[n];
2376 
2377     if (n < VIRTIO_QUEUE_MAX) {
2378         if (vdev->vector_queues &&
2379             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2380             QLIST_REMOVE(vq, node);
2381         }
2382         vdev->vq[n].vector = vector;
2383         if (vdev->vector_queues &&
2384             vector != VIRTIO_NO_VECTOR) {
2385             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2386         }
2387     }
2388 }
2389 
2390 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2391                             VirtIOHandleOutput handle_output)
2392 {
2393     int i;
2394 
2395     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2396         if (vdev->vq[i].vring.num == 0)
2397             break;
2398     }
2399 
2400     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2401         abort();
2402 
2403     vdev->vq[i].vring.num = queue_size;
2404     vdev->vq[i].vring.num_default = queue_size;
2405     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2406     vdev->vq[i].handle_output = handle_output;
2407     vdev->vq[i].handle_aio_output = NULL;
2408     vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) *
2409                                        queue_size);
2410 
2411     return &vdev->vq[i];
2412 }
2413 
2414 void virtio_delete_queue(VirtQueue *vq)
2415 {
2416     vq->vring.num = 0;
2417     vq->vring.num_default = 0;
2418     vq->handle_output = NULL;
2419     vq->handle_aio_output = NULL;
2420     g_free(vq->used_elems);
2421     vq->used_elems = NULL;
2422     virtio_virtqueue_reset_region_cache(vq);
2423 }
2424 
2425 void virtio_del_queue(VirtIODevice *vdev, int n)
2426 {
2427     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2428         abort();
2429     }
2430 
2431     virtio_delete_queue(&vdev->vq[n]);
2432 }
2433 
2434 static void virtio_set_isr(VirtIODevice *vdev, int value)
2435 {
2436     uint8_t old = qatomic_read(&vdev->isr);
2437 
2438     /* Do not write ISR if it does not change, so that its cacheline remains
2439      * shared in the common case where the guest does not read it.
2440      */
2441     if ((old & value) != value) {
2442         qatomic_or(&vdev->isr, value);
2443     }
2444 }
2445 
2446 /* Called within rcu_read_lock(). */
2447 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2448 {
2449     uint16_t old, new;
2450     bool v;
2451     /* We need to expose used array entries before checking used event. */
2452     smp_mb();
2453     /* Always notify when queue is empty (when feature acknowledge) */
2454     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2455         !vq->inuse && virtio_queue_empty(vq)) {
2456         return true;
2457     }
2458 
2459     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2460         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2461     }
2462 
2463     v = vq->signalled_used_valid;
2464     vq->signalled_used_valid = true;
2465     old = vq->signalled_used;
2466     new = vq->signalled_used = vq->used_idx;
2467     return !v || vring_need_event(vring_get_used_event(vq), new, old);
2468 }
2469 
2470 static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2471                                     uint16_t off_wrap, uint16_t new,
2472                                     uint16_t old)
2473 {
2474     int off = off_wrap & ~(1 << 15);
2475 
2476     if (wrap != off_wrap >> 15) {
2477         off -= vq->vring.num;
2478     }
2479 
2480     return vring_need_event(off, new, old);
2481 }
2482 
2483 /* Called within rcu_read_lock(). */
2484 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2485 {
2486     VRingPackedDescEvent e;
2487     uint16_t old, new;
2488     bool v;
2489     VRingMemoryRegionCaches *caches;
2490 
2491     caches = vring_get_region_caches(vq);
2492     if (!caches) {
2493         return false;
2494     }
2495 
2496     vring_packed_event_read(vdev, &caches->avail, &e);
2497 
2498     old = vq->signalled_used;
2499     new = vq->signalled_used = vq->used_idx;
2500     v = vq->signalled_used_valid;
2501     vq->signalled_used_valid = true;
2502 
2503     if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2504         return false;
2505     } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2506         return true;
2507     }
2508 
2509     return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2510                                          e.off_wrap, new, old);
2511 }
2512 
2513 /* Called within rcu_read_lock().  */
2514 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2515 {
2516     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2517         return virtio_packed_should_notify(vdev, vq);
2518     } else {
2519         return virtio_split_should_notify(vdev, vq);
2520     }
2521 }
2522 
2523 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2524 {
2525     WITH_RCU_READ_LOCK_GUARD() {
2526         if (!virtio_should_notify(vdev, vq)) {
2527             return;
2528         }
2529     }
2530 
2531     trace_virtio_notify_irqfd(vdev, vq);
2532 
2533     /*
2534      * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2535      * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2536      * incorrectly polling this bit during crashdump and hibernation
2537      * in MSI mode, causing a hang if this bit is never updated.
2538      * Recent releases of Windows do not really shut down, but rather
2539      * log out and hibernate to make the next startup faster.  Hence,
2540      * this manifested as a more serious hang during shutdown with
2541      *
2542      * Next driver release from 2016 fixed this problem, so working around it
2543      * is not a must, but it's easy to do so let's do it here.
2544      *
2545      * Note: it's safe to update ISR from any thread as it was switched
2546      * to an atomic operation.
2547      */
2548     virtio_set_isr(vq->vdev, 0x1);
2549     event_notifier_set(&vq->guest_notifier);
2550 }
2551 
2552 static void virtio_irq(VirtQueue *vq)
2553 {
2554     virtio_set_isr(vq->vdev, 0x1);
2555     virtio_notify_vector(vq->vdev, vq->vector);
2556 }
2557 
2558 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2559 {
2560     WITH_RCU_READ_LOCK_GUARD() {
2561         if (!virtio_should_notify(vdev, vq)) {
2562             return;
2563         }
2564     }
2565 
2566     trace_virtio_notify(vdev, vq);
2567     virtio_irq(vq);
2568 }
2569 
2570 void virtio_notify_config(VirtIODevice *vdev)
2571 {
2572     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2573         return;
2574 
2575     virtio_set_isr(vdev, 0x3);
2576     vdev->generation++;
2577     virtio_notify_vector(vdev, vdev->config_vector);
2578 }
2579 
2580 static bool virtio_device_endian_needed(void *opaque)
2581 {
2582     VirtIODevice *vdev = opaque;
2583 
2584     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2585     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2586         return vdev->device_endian != virtio_default_endian();
2587     }
2588     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2589     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2590 }
2591 
2592 static bool virtio_64bit_features_needed(void *opaque)
2593 {
2594     VirtIODevice *vdev = opaque;
2595 
2596     return (vdev->host_features >> 32) != 0;
2597 }
2598 
2599 static bool virtio_virtqueue_needed(void *opaque)
2600 {
2601     VirtIODevice *vdev = opaque;
2602 
2603     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2604 }
2605 
2606 static bool virtio_packed_virtqueue_needed(void *opaque)
2607 {
2608     VirtIODevice *vdev = opaque;
2609 
2610     return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2611 }
2612 
2613 static bool virtio_ringsize_needed(void *opaque)
2614 {
2615     VirtIODevice *vdev = opaque;
2616     int i;
2617 
2618     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2619         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2620             return true;
2621         }
2622     }
2623     return false;
2624 }
2625 
2626 static bool virtio_extra_state_needed(void *opaque)
2627 {
2628     VirtIODevice *vdev = opaque;
2629     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2630     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2631 
2632     return k->has_extra_state &&
2633         k->has_extra_state(qbus->parent);
2634 }
2635 
2636 static bool virtio_broken_needed(void *opaque)
2637 {
2638     VirtIODevice *vdev = opaque;
2639 
2640     return vdev->broken;
2641 }
2642 
2643 static bool virtio_started_needed(void *opaque)
2644 {
2645     VirtIODevice *vdev = opaque;
2646 
2647     return vdev->started;
2648 }
2649 
2650 static bool virtio_disabled_needed(void *opaque)
2651 {
2652     VirtIODevice *vdev = opaque;
2653 
2654     return vdev->disabled;
2655 }
2656 
2657 static const VMStateDescription vmstate_virtqueue = {
2658     .name = "virtqueue_state",
2659     .version_id = 1,
2660     .minimum_version_id = 1,
2661     .fields = (VMStateField[]) {
2662         VMSTATE_UINT64(vring.avail, struct VirtQueue),
2663         VMSTATE_UINT64(vring.used, struct VirtQueue),
2664         VMSTATE_END_OF_LIST()
2665     }
2666 };
2667 
2668 static const VMStateDescription vmstate_packed_virtqueue = {
2669     .name = "packed_virtqueue_state",
2670     .version_id = 1,
2671     .minimum_version_id = 1,
2672     .fields = (VMStateField[]) {
2673         VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2674         VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2675         VMSTATE_UINT16(used_idx, struct VirtQueue),
2676         VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2677         VMSTATE_UINT32(inuse, struct VirtQueue),
2678         VMSTATE_END_OF_LIST()
2679     }
2680 };
2681 
2682 static const VMStateDescription vmstate_virtio_virtqueues = {
2683     .name = "virtio/virtqueues",
2684     .version_id = 1,
2685     .minimum_version_id = 1,
2686     .needed = &virtio_virtqueue_needed,
2687     .fields = (VMStateField[]) {
2688         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2689                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2690         VMSTATE_END_OF_LIST()
2691     }
2692 };
2693 
2694 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2695     .name = "virtio/packed_virtqueues",
2696     .version_id = 1,
2697     .minimum_version_id = 1,
2698     .needed = &virtio_packed_virtqueue_needed,
2699     .fields = (VMStateField[]) {
2700         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2701                       VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2702         VMSTATE_END_OF_LIST()
2703     }
2704 };
2705 
2706 static const VMStateDescription vmstate_ringsize = {
2707     .name = "ringsize_state",
2708     .version_id = 1,
2709     .minimum_version_id = 1,
2710     .fields = (VMStateField[]) {
2711         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2712         VMSTATE_END_OF_LIST()
2713     }
2714 };
2715 
2716 static const VMStateDescription vmstate_virtio_ringsize = {
2717     .name = "virtio/ringsize",
2718     .version_id = 1,
2719     .minimum_version_id = 1,
2720     .needed = &virtio_ringsize_needed,
2721     .fields = (VMStateField[]) {
2722         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2723                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2724         VMSTATE_END_OF_LIST()
2725     }
2726 };
2727 
2728 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2729                            const VMStateField *field)
2730 {
2731     VirtIODevice *vdev = pv;
2732     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2733     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2734 
2735     if (!k->load_extra_state) {
2736         return -1;
2737     } else {
2738         return k->load_extra_state(qbus->parent, f);
2739     }
2740 }
2741 
2742 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2743                            const VMStateField *field, JSONWriter *vmdesc)
2744 {
2745     VirtIODevice *vdev = pv;
2746     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2747     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2748 
2749     k->save_extra_state(qbus->parent, f);
2750     return 0;
2751 }
2752 
2753 static const VMStateInfo vmstate_info_extra_state = {
2754     .name = "virtqueue_extra_state",
2755     .get = get_extra_state,
2756     .put = put_extra_state,
2757 };
2758 
2759 static const VMStateDescription vmstate_virtio_extra_state = {
2760     .name = "virtio/extra_state",
2761     .version_id = 1,
2762     .minimum_version_id = 1,
2763     .needed = &virtio_extra_state_needed,
2764     .fields = (VMStateField[]) {
2765         {
2766             .name         = "extra_state",
2767             .version_id   = 0,
2768             .field_exists = NULL,
2769             .size         = 0,
2770             .info         = &vmstate_info_extra_state,
2771             .flags        = VMS_SINGLE,
2772             .offset       = 0,
2773         },
2774         VMSTATE_END_OF_LIST()
2775     }
2776 };
2777 
2778 static const VMStateDescription vmstate_virtio_device_endian = {
2779     .name = "virtio/device_endian",
2780     .version_id = 1,
2781     .minimum_version_id = 1,
2782     .needed = &virtio_device_endian_needed,
2783     .fields = (VMStateField[]) {
2784         VMSTATE_UINT8(device_endian, VirtIODevice),
2785         VMSTATE_END_OF_LIST()
2786     }
2787 };
2788 
2789 static const VMStateDescription vmstate_virtio_64bit_features = {
2790     .name = "virtio/64bit_features",
2791     .version_id = 1,
2792     .minimum_version_id = 1,
2793     .needed = &virtio_64bit_features_needed,
2794     .fields = (VMStateField[]) {
2795         VMSTATE_UINT64(guest_features, VirtIODevice),
2796         VMSTATE_END_OF_LIST()
2797     }
2798 };
2799 
2800 static const VMStateDescription vmstate_virtio_broken = {
2801     .name = "virtio/broken",
2802     .version_id = 1,
2803     .minimum_version_id = 1,
2804     .needed = &virtio_broken_needed,
2805     .fields = (VMStateField[]) {
2806         VMSTATE_BOOL(broken, VirtIODevice),
2807         VMSTATE_END_OF_LIST()
2808     }
2809 };
2810 
2811 static const VMStateDescription vmstate_virtio_started = {
2812     .name = "virtio/started",
2813     .version_id = 1,
2814     .minimum_version_id = 1,
2815     .needed = &virtio_started_needed,
2816     .fields = (VMStateField[]) {
2817         VMSTATE_BOOL(started, VirtIODevice),
2818         VMSTATE_END_OF_LIST()
2819     }
2820 };
2821 
2822 static const VMStateDescription vmstate_virtio_disabled = {
2823     .name = "virtio/disabled",
2824     .version_id = 1,
2825     .minimum_version_id = 1,
2826     .needed = &virtio_disabled_needed,
2827     .fields = (VMStateField[]) {
2828         VMSTATE_BOOL(disabled, VirtIODevice),
2829         VMSTATE_END_OF_LIST()
2830     }
2831 };
2832 
2833 static const VMStateDescription vmstate_virtio = {
2834     .name = "virtio",
2835     .version_id = 1,
2836     .minimum_version_id = 1,
2837     .minimum_version_id_old = 1,
2838     .fields = (VMStateField[]) {
2839         VMSTATE_END_OF_LIST()
2840     },
2841     .subsections = (const VMStateDescription*[]) {
2842         &vmstate_virtio_device_endian,
2843         &vmstate_virtio_64bit_features,
2844         &vmstate_virtio_virtqueues,
2845         &vmstate_virtio_ringsize,
2846         &vmstate_virtio_broken,
2847         &vmstate_virtio_extra_state,
2848         &vmstate_virtio_started,
2849         &vmstate_virtio_packed_virtqueues,
2850         &vmstate_virtio_disabled,
2851         NULL
2852     }
2853 };
2854 
2855 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2856 {
2857     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2858     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2859     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2860     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2861     int i;
2862 
2863     if (k->save_config) {
2864         k->save_config(qbus->parent, f);
2865     }
2866 
2867     qemu_put_8s(f, &vdev->status);
2868     qemu_put_8s(f, &vdev->isr);
2869     qemu_put_be16s(f, &vdev->queue_sel);
2870     qemu_put_be32s(f, &guest_features_lo);
2871     qemu_put_be32(f, vdev->config_len);
2872     qemu_put_buffer(f, vdev->config, vdev->config_len);
2873 
2874     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2875         if (vdev->vq[i].vring.num == 0)
2876             break;
2877     }
2878 
2879     qemu_put_be32(f, i);
2880 
2881     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2882         if (vdev->vq[i].vring.num == 0)
2883             break;
2884 
2885         qemu_put_be32(f, vdev->vq[i].vring.num);
2886         if (k->has_variable_vring_alignment) {
2887             qemu_put_be32(f, vdev->vq[i].vring.align);
2888         }
2889         /*
2890          * Save desc now, the rest of the ring addresses are saved in
2891          * subsections for VIRTIO-1 devices.
2892          */
2893         qemu_put_be64(f, vdev->vq[i].vring.desc);
2894         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2895         if (k->save_queue) {
2896             k->save_queue(qbus->parent, i, f);
2897         }
2898     }
2899 
2900     if (vdc->save != NULL) {
2901         vdc->save(vdev, f);
2902     }
2903 
2904     if (vdc->vmsd) {
2905         int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2906         if (ret) {
2907             return ret;
2908         }
2909     }
2910 
2911     /* Subsections */
2912     return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2913 }
2914 
2915 /* A wrapper for use as a VMState .put function */
2916 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2917                               const VMStateField *field, JSONWriter *vmdesc)
2918 {
2919     return virtio_save(VIRTIO_DEVICE(opaque), f);
2920 }
2921 
2922 /* A wrapper for use as a VMState .get function */
2923 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2924                              const VMStateField *field)
2925 {
2926     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2927     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2928 
2929     return virtio_load(vdev, f, dc->vmsd->version_id);
2930 }
2931 
2932 const VMStateInfo  virtio_vmstate_info = {
2933     .name = "virtio",
2934     .get = virtio_device_get,
2935     .put = virtio_device_put,
2936 };
2937 
2938 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2939 {
2940     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2941     bool bad = (val & ~(vdev->host_features)) != 0;
2942 
2943     val &= vdev->host_features;
2944     if (k->set_features) {
2945         k->set_features(vdev, val);
2946     }
2947     vdev->guest_features = val;
2948     return bad ? -1 : 0;
2949 }
2950 
2951 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2952 {
2953     int ret;
2954     /*
2955      * The driver must not attempt to set features after feature negotiation
2956      * has finished.
2957      */
2958     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2959         return -EINVAL;
2960     }
2961     ret = virtio_set_features_nocheck(vdev, val);
2962     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2963         /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2964         int i;
2965         for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2966             if (vdev->vq[i].vring.num != 0) {
2967                 virtio_init_region_cache(vdev, i);
2968             }
2969         }
2970     }
2971     if (!ret) {
2972         if (!virtio_device_started(vdev, vdev->status) &&
2973             !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2974             vdev->start_on_kick = true;
2975         }
2976     }
2977     return ret;
2978 }
2979 
2980 size_t virtio_feature_get_config_size(const VirtIOFeature *feature_sizes,
2981                                       uint64_t host_features)
2982 {
2983     size_t config_size = 0;
2984     int i;
2985 
2986     for (i = 0; feature_sizes[i].flags != 0; i++) {
2987         if (host_features & feature_sizes[i].flags) {
2988             config_size = MAX(feature_sizes[i].end, config_size);
2989         }
2990     }
2991 
2992     return config_size;
2993 }
2994 
2995 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
2996 {
2997     int i, ret;
2998     int32_t config_len;
2999     uint32_t num;
3000     uint32_t features;
3001     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3002     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3003     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3004 
3005     /*
3006      * We poison the endianness to ensure it does not get used before
3007      * subsections have been loaded.
3008      */
3009     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3010 
3011     if (k->load_config) {
3012         ret = k->load_config(qbus->parent, f);
3013         if (ret)
3014             return ret;
3015     }
3016 
3017     qemu_get_8s(f, &vdev->status);
3018     qemu_get_8s(f, &vdev->isr);
3019     qemu_get_be16s(f, &vdev->queue_sel);
3020     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3021         return -1;
3022     }
3023     qemu_get_be32s(f, &features);
3024 
3025     /*
3026      * Temporarily set guest_features low bits - needed by
3027      * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3028      * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3029      *
3030      * Note: devices should always test host features in future - don't create
3031      * new dependencies like this.
3032      */
3033     vdev->guest_features = features;
3034 
3035     config_len = qemu_get_be32(f);
3036 
3037     /*
3038      * There are cases where the incoming config can be bigger or smaller
3039      * than what we have; so load what we have space for, and skip
3040      * any excess that's in the stream.
3041      */
3042     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3043 
3044     while (config_len > vdev->config_len) {
3045         qemu_get_byte(f);
3046         config_len--;
3047     }
3048 
3049     num = qemu_get_be32(f);
3050 
3051     if (num > VIRTIO_QUEUE_MAX) {
3052         error_report("Invalid number of virtqueues: 0x%x", num);
3053         return -1;
3054     }
3055 
3056     for (i = 0; i < num; i++) {
3057         vdev->vq[i].vring.num = qemu_get_be32(f);
3058         if (k->has_variable_vring_alignment) {
3059             vdev->vq[i].vring.align = qemu_get_be32(f);
3060         }
3061         vdev->vq[i].vring.desc = qemu_get_be64(f);
3062         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3063         vdev->vq[i].signalled_used_valid = false;
3064         vdev->vq[i].notification = true;
3065 
3066         if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3067             error_report("VQ %d address 0x0 "
3068                          "inconsistent with Host index 0x%x",
3069                          i, vdev->vq[i].last_avail_idx);
3070             return -1;
3071         }
3072         if (k->load_queue) {
3073             ret = k->load_queue(qbus->parent, i, f);
3074             if (ret)
3075                 return ret;
3076         }
3077     }
3078 
3079     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3080 
3081     if (vdc->load != NULL) {
3082         ret = vdc->load(vdev, f, version_id);
3083         if (ret) {
3084             return ret;
3085         }
3086     }
3087 
3088     if (vdc->vmsd) {
3089         ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3090         if (ret) {
3091             return ret;
3092         }
3093     }
3094 
3095     /* Subsections */
3096     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3097     if (ret) {
3098         return ret;
3099     }
3100 
3101     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3102         vdev->device_endian = virtio_default_endian();
3103     }
3104 
3105     if (virtio_64bit_features_needed(vdev)) {
3106         /*
3107          * Subsection load filled vdev->guest_features.  Run them
3108          * through virtio_set_features to sanity-check them against
3109          * host_features.
3110          */
3111         uint64_t features64 = vdev->guest_features;
3112         if (virtio_set_features_nocheck(vdev, features64) < 0) {
3113             error_report("Features 0x%" PRIx64 " unsupported. "
3114                          "Allowed features: 0x%" PRIx64,
3115                          features64, vdev->host_features);
3116             return -1;
3117         }
3118     } else {
3119         if (virtio_set_features_nocheck(vdev, features) < 0) {
3120             error_report("Features 0x%x unsupported. "
3121                          "Allowed features: 0x%" PRIx64,
3122                          features, vdev->host_features);
3123             return -1;
3124         }
3125     }
3126 
3127     if (!virtio_device_started(vdev, vdev->status) &&
3128         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3129         vdev->start_on_kick = true;
3130     }
3131 
3132     RCU_READ_LOCK_GUARD();
3133     for (i = 0; i < num; i++) {
3134         if (vdev->vq[i].vring.desc) {
3135             uint16_t nheads;
3136 
3137             /*
3138              * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3139              * only the region cache needs to be set up.  Legacy devices need
3140              * to calculate used and avail ring addresses based on the desc
3141              * address.
3142              */
3143             if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3144                 virtio_init_region_cache(vdev, i);
3145             } else {
3146                 virtio_queue_update_rings(vdev, i);
3147             }
3148 
3149             if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3150                 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3151                 vdev->vq[i].shadow_avail_wrap_counter =
3152                                         vdev->vq[i].last_avail_wrap_counter;
3153                 continue;
3154             }
3155 
3156             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3157             /* Check it isn't doing strange things with descriptor numbers. */
3158             if (nheads > vdev->vq[i].vring.num) {
3159                 virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3160                              "inconsistent with Host index 0x%x: delta 0x%x",
3161                              i, vdev->vq[i].vring.num,
3162                              vring_avail_idx(&vdev->vq[i]),
3163                              vdev->vq[i].last_avail_idx, nheads);
3164                 vdev->vq[i].used_idx = 0;
3165                 vdev->vq[i].shadow_avail_idx = 0;
3166                 vdev->vq[i].inuse = 0;
3167                 continue;
3168             }
3169             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3170             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3171 
3172             /*
3173              * Some devices migrate VirtQueueElements that have been popped
3174              * from the avail ring but not yet returned to the used ring.
3175              * Since max ring size < UINT16_MAX it's safe to use modulo
3176              * UINT16_MAX + 1 subtraction.
3177              */
3178             vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3179                                 vdev->vq[i].used_idx);
3180             if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3181                 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3182                              "used_idx 0x%x",
3183                              i, vdev->vq[i].vring.num,
3184                              vdev->vq[i].last_avail_idx,
3185                              vdev->vq[i].used_idx);
3186                 return -1;
3187             }
3188         }
3189     }
3190 
3191     if (vdc->post_load) {
3192         ret = vdc->post_load(vdev);
3193         if (ret) {
3194             return ret;
3195         }
3196     }
3197 
3198     return 0;
3199 }
3200 
3201 void virtio_cleanup(VirtIODevice *vdev)
3202 {
3203     qemu_del_vm_change_state_handler(vdev->vmstate);
3204 }
3205 
3206 static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3207 {
3208     VirtIODevice *vdev = opaque;
3209     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3210     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3211     bool backend_run = running && virtio_device_started(vdev, vdev->status);
3212     vdev->vm_running = running;
3213 
3214     if (backend_run) {
3215         virtio_set_status(vdev, vdev->status);
3216     }
3217 
3218     if (k->vmstate_change) {
3219         k->vmstate_change(qbus->parent, backend_run);
3220     }
3221 
3222     if (!backend_run) {
3223         virtio_set_status(vdev, vdev->status);
3224     }
3225 }
3226 
3227 void virtio_instance_init_common(Object *proxy_obj, void *data,
3228                                  size_t vdev_size, const char *vdev_name)
3229 {
3230     DeviceState *vdev = data;
3231 
3232     object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3233                                        vdev_size, vdev_name, &error_abort,
3234                                        NULL);
3235     qdev_alias_all_properties(vdev, proxy_obj);
3236 }
3237 
3238 void virtio_init(VirtIODevice *vdev, const char *name,
3239                  uint16_t device_id, size_t config_size)
3240 {
3241     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3242     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3243     int i;
3244     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3245 
3246     if (nvectors) {
3247         vdev->vector_queues =
3248             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3249     }
3250 
3251     vdev->start_on_kick = false;
3252     vdev->started = false;
3253     vdev->device_id = device_id;
3254     vdev->status = 0;
3255     qatomic_set(&vdev->isr, 0);
3256     vdev->queue_sel = 0;
3257     vdev->config_vector = VIRTIO_NO_VECTOR;
3258     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
3259     vdev->vm_running = runstate_is_running();
3260     vdev->broken = false;
3261     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3262         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3263         vdev->vq[i].vdev = vdev;
3264         vdev->vq[i].queue_index = i;
3265         vdev->vq[i].host_notifier_enabled = false;
3266     }
3267 
3268     vdev->name = name;
3269     vdev->config_len = config_size;
3270     if (vdev->config_len) {
3271         vdev->config = g_malloc0(config_size);
3272     } else {
3273         vdev->config = NULL;
3274     }
3275     vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3276             virtio_vmstate_change, vdev);
3277     vdev->device_endian = virtio_default_endian();
3278     vdev->use_guest_notifier_mask = true;
3279 }
3280 
3281 /*
3282  * Only devices that have already been around prior to defining the virtio
3283  * standard support legacy mode; this includes devices not specified in the
3284  * standard. All newer devices conform to the virtio standard only.
3285  */
3286 bool virtio_legacy_allowed(VirtIODevice *vdev)
3287 {
3288     switch (vdev->device_id) {
3289     case VIRTIO_ID_NET:
3290     case VIRTIO_ID_BLOCK:
3291     case VIRTIO_ID_CONSOLE:
3292     case VIRTIO_ID_RNG:
3293     case VIRTIO_ID_BALLOON:
3294     case VIRTIO_ID_RPMSG:
3295     case VIRTIO_ID_SCSI:
3296     case VIRTIO_ID_9P:
3297     case VIRTIO_ID_RPROC_SERIAL:
3298     case VIRTIO_ID_CAIF:
3299         return true;
3300     default:
3301         return false;
3302     }
3303 }
3304 
3305 bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3306 {
3307     return vdev->disable_legacy_check;
3308 }
3309 
3310 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3311 {
3312     return vdev->vq[n].vring.desc;
3313 }
3314 
3315 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3316 {
3317     return virtio_queue_get_desc_addr(vdev, n) != 0;
3318 }
3319 
3320 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3321 {
3322     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3323     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3324 
3325     if (k->queue_enabled) {
3326         return k->queue_enabled(qbus->parent, n);
3327     }
3328     return virtio_queue_enabled_legacy(vdev, n);
3329 }
3330 
3331 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3332 {
3333     return vdev->vq[n].vring.avail;
3334 }
3335 
3336 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3337 {
3338     return vdev->vq[n].vring.used;
3339 }
3340 
3341 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3342 {
3343     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3344 }
3345 
3346 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3347 {
3348     int s;
3349 
3350     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3351         return sizeof(struct VRingPackedDescEvent);
3352     }
3353 
3354     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3355     return offsetof(VRingAvail, ring) +
3356         sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3357 }
3358 
3359 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3360 {
3361     int s;
3362 
3363     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3364         return sizeof(struct VRingPackedDescEvent);
3365     }
3366 
3367     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3368     return offsetof(VRingUsed, ring) +
3369         sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3370 }
3371 
3372 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3373                                                            int n)
3374 {
3375     unsigned int avail, used;
3376 
3377     avail = vdev->vq[n].last_avail_idx;
3378     avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3379 
3380     used = vdev->vq[n].used_idx;
3381     used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3382 
3383     return avail | used << 16;
3384 }
3385 
3386 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3387                                                       int n)
3388 {
3389     return vdev->vq[n].last_avail_idx;
3390 }
3391 
3392 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3393 {
3394     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3395         return virtio_queue_packed_get_last_avail_idx(vdev, n);
3396     } else {
3397         return virtio_queue_split_get_last_avail_idx(vdev, n);
3398     }
3399 }
3400 
3401 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3402                                                    int n, unsigned int idx)
3403 {
3404     struct VirtQueue *vq = &vdev->vq[n];
3405 
3406     vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3407     vq->last_avail_wrap_counter =
3408         vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3409     idx >>= 16;
3410     vq->used_idx = idx & 0x7ffff;
3411     vq->used_wrap_counter = !!(idx & 0x8000);
3412 }
3413 
3414 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3415                                                   int n, unsigned int idx)
3416 {
3417         vdev->vq[n].last_avail_idx = idx;
3418         vdev->vq[n].shadow_avail_idx = idx;
3419 }
3420 
3421 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3422                                      unsigned int idx)
3423 {
3424     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3425         virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3426     } else {
3427         virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3428     }
3429 }
3430 
3431 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3432                                                        int n)
3433 {
3434     /* We don't have a reference like avail idx in shared memory */
3435     return;
3436 }
3437 
3438 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3439                                                       int n)
3440 {
3441     RCU_READ_LOCK_GUARD();
3442     if (vdev->vq[n].vring.desc) {
3443         vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3444         vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3445     }
3446 }
3447 
3448 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3449 {
3450     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3451         virtio_queue_packed_restore_last_avail_idx(vdev, n);
3452     } else {
3453         virtio_queue_split_restore_last_avail_idx(vdev, n);
3454     }
3455 }
3456 
3457 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3458 {
3459     /* used idx was updated through set_last_avail_idx() */
3460     return;
3461 }
3462 
3463 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3464 {
3465     RCU_READ_LOCK_GUARD();
3466     if (vdev->vq[n].vring.desc) {
3467         vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3468     }
3469 }
3470 
3471 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3472 {
3473     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3474         return virtio_queue_packed_update_used_idx(vdev, n);
3475     } else {
3476         return virtio_split_packed_update_used_idx(vdev, n);
3477     }
3478 }
3479 
3480 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3481 {
3482     vdev->vq[n].signalled_used_valid = false;
3483 }
3484 
3485 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3486 {
3487     return vdev->vq + n;
3488 }
3489 
3490 uint16_t virtio_get_queue_index(VirtQueue *vq)
3491 {
3492     return vq->queue_index;
3493 }
3494 
3495 static void virtio_queue_guest_notifier_read(EventNotifier *n)
3496 {
3497     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3498     if (event_notifier_test_and_clear(n)) {
3499         virtio_irq(vq);
3500     }
3501 }
3502 
3503 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3504                                                 bool with_irqfd)
3505 {
3506     if (assign && !with_irqfd) {
3507         event_notifier_set_handler(&vq->guest_notifier,
3508                                    virtio_queue_guest_notifier_read);
3509     } else {
3510         event_notifier_set_handler(&vq->guest_notifier, NULL);
3511     }
3512     if (!assign) {
3513         /* Test and clear notifier before closing it,
3514          * in case poll callback didn't have time to run. */
3515         virtio_queue_guest_notifier_read(&vq->guest_notifier);
3516     }
3517 }
3518 
3519 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3520 {
3521     return &vq->guest_notifier;
3522 }
3523 
3524 static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
3525 {
3526     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3527     if (event_notifier_test_and_clear(n)) {
3528         virtio_queue_notify_aio_vq(vq);
3529     }
3530 }
3531 
3532 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3533 {
3534     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3535 
3536     virtio_queue_set_notification(vq, 0);
3537 }
3538 
3539 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3540 {
3541     EventNotifier *n = opaque;
3542     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3543 
3544     if (!vq->vring.desc || virtio_queue_empty(vq)) {
3545         return false;
3546     }
3547 
3548     return virtio_queue_notify_aio_vq(vq);
3549 }
3550 
3551 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3552 {
3553     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3554 
3555     /* Caller polls once more after this to catch requests that race with us */
3556     virtio_queue_set_notification(vq, 1);
3557 }
3558 
3559 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
3560                                                 VirtIOHandleAIOOutput handle_output)
3561 {
3562     if (handle_output) {
3563         vq->handle_aio_output = handle_output;
3564         aio_set_event_notifier(ctx, &vq->host_notifier, true,
3565                                virtio_queue_host_notifier_aio_read,
3566                                virtio_queue_host_notifier_aio_poll);
3567         aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3568                                     virtio_queue_host_notifier_aio_poll_begin,
3569                                     virtio_queue_host_notifier_aio_poll_end);
3570     } else {
3571         aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
3572         /* Test and clear notifier before after disabling event,
3573          * in case poll callback didn't have time to run. */
3574         virtio_queue_host_notifier_aio_read(&vq->host_notifier);
3575         vq->handle_aio_output = NULL;
3576     }
3577 }
3578 
3579 void virtio_queue_host_notifier_read(EventNotifier *n)
3580 {
3581     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3582     if (event_notifier_test_and_clear(n)) {
3583         virtio_queue_notify_vq(vq);
3584     }
3585 }
3586 
3587 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3588 {
3589     return &vq->host_notifier;
3590 }
3591 
3592 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3593 {
3594     vq->host_notifier_enabled = enabled;
3595 }
3596 
3597 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3598                                       MemoryRegion *mr, bool assign)
3599 {
3600     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3601     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3602 
3603     if (k->set_host_notifier_mr) {
3604         return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3605     }
3606 
3607     return -1;
3608 }
3609 
3610 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3611 {
3612     g_free(vdev->bus_name);
3613     vdev->bus_name = g_strdup(bus_name);
3614 }
3615 
3616 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3617 {
3618     va_list ap;
3619 
3620     va_start(ap, fmt);
3621     error_vreport(fmt, ap);
3622     va_end(ap);
3623 
3624     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3625         vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3626         virtio_notify_config(vdev);
3627     }
3628 
3629     vdev->broken = true;
3630 }
3631 
3632 static void virtio_memory_listener_commit(MemoryListener *listener)
3633 {
3634     VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3635     int i;
3636 
3637     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3638         if (vdev->vq[i].vring.num == 0) {
3639             break;
3640         }
3641         virtio_init_region_cache(vdev, i);
3642     }
3643 }
3644 
3645 static void virtio_device_realize(DeviceState *dev, Error **errp)
3646 {
3647     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3648     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3649     Error *err = NULL;
3650 
3651     /* Devices should either use vmsd or the load/save methods */
3652     assert(!vdc->vmsd || !vdc->load);
3653 
3654     if (vdc->realize != NULL) {
3655         vdc->realize(dev, &err);
3656         if (err != NULL) {
3657             error_propagate(errp, err);
3658             return;
3659         }
3660     }
3661 
3662     virtio_bus_device_plugged(vdev, &err);
3663     if (err != NULL) {
3664         error_propagate(errp, err);
3665         vdc->unrealize(dev);
3666         return;
3667     }
3668 
3669     vdev->listener.commit = virtio_memory_listener_commit;
3670     vdev->listener.name = "virtio";
3671     memory_listener_register(&vdev->listener, vdev->dma_as);
3672 }
3673 
3674 static void virtio_device_unrealize(DeviceState *dev)
3675 {
3676     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3677     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3678 
3679     memory_listener_unregister(&vdev->listener);
3680     virtio_bus_device_unplugged(vdev);
3681 
3682     if (vdc->unrealize != NULL) {
3683         vdc->unrealize(dev);
3684     }
3685 
3686     g_free(vdev->bus_name);
3687     vdev->bus_name = NULL;
3688 }
3689 
3690 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3691 {
3692     int i;
3693     if (!vdev->vq) {
3694         return;
3695     }
3696 
3697     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3698         if (vdev->vq[i].vring.num == 0) {
3699             break;
3700         }
3701         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3702     }
3703     g_free(vdev->vq);
3704 }
3705 
3706 static void virtio_device_instance_finalize(Object *obj)
3707 {
3708     VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3709 
3710     virtio_device_free_virtqueues(vdev);
3711 
3712     g_free(vdev->config);
3713     g_free(vdev->vector_queues);
3714 }
3715 
3716 static Property virtio_properties[] = {
3717     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3718     DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3719     DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3720     DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3721                      disable_legacy_check, false),
3722     DEFINE_PROP_END_OF_LIST(),
3723 };
3724 
3725 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3726 {
3727     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3728     int i, n, r, err;
3729 
3730     /*
3731      * Batch all the host notifiers in a single transaction to avoid
3732      * quadratic time complexity in address_space_update_ioeventfds().
3733      */
3734     memory_region_transaction_begin();
3735     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3736         VirtQueue *vq = &vdev->vq[n];
3737         if (!virtio_queue_get_num(vdev, n)) {
3738             continue;
3739         }
3740         r = virtio_bus_set_host_notifier(qbus, n, true);
3741         if (r < 0) {
3742             err = r;
3743             goto assign_error;
3744         }
3745         event_notifier_set_handler(&vq->host_notifier,
3746                                    virtio_queue_host_notifier_read);
3747     }
3748 
3749     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3750         /* Kick right away to begin processing requests already in vring */
3751         VirtQueue *vq = &vdev->vq[n];
3752         if (!vq->vring.num) {
3753             continue;
3754         }
3755         event_notifier_set(&vq->host_notifier);
3756     }
3757     memory_region_transaction_commit();
3758     return 0;
3759 
3760 assign_error:
3761     i = n; /* save n for a second iteration after transaction is committed. */
3762     while (--n >= 0) {
3763         VirtQueue *vq = &vdev->vq[n];
3764         if (!virtio_queue_get_num(vdev, n)) {
3765             continue;
3766         }
3767 
3768         event_notifier_set_handler(&vq->host_notifier, NULL);
3769         r = virtio_bus_set_host_notifier(qbus, n, false);
3770         assert(r >= 0);
3771     }
3772     /*
3773      * The transaction expects the ioeventfds to be open when it
3774      * commits. Do it now, before the cleanup loop.
3775      */
3776     memory_region_transaction_commit();
3777 
3778     while (--i >= 0) {
3779         if (!virtio_queue_get_num(vdev, i)) {
3780             continue;
3781         }
3782         virtio_bus_cleanup_host_notifier(qbus, i);
3783     }
3784     return err;
3785 }
3786 
3787 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3788 {
3789     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3790     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3791 
3792     return virtio_bus_start_ioeventfd(vbus);
3793 }
3794 
3795 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3796 {
3797     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3798     int n, r;
3799 
3800     /*
3801      * Batch all the host notifiers in a single transaction to avoid
3802      * quadratic time complexity in address_space_update_ioeventfds().
3803      */
3804     memory_region_transaction_begin();
3805     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3806         VirtQueue *vq = &vdev->vq[n];
3807 
3808         if (!virtio_queue_get_num(vdev, n)) {
3809             continue;
3810         }
3811         event_notifier_set_handler(&vq->host_notifier, NULL);
3812         r = virtio_bus_set_host_notifier(qbus, n, false);
3813         assert(r >= 0);
3814     }
3815     /*
3816      * The transaction expects the ioeventfds to be open when it
3817      * commits. Do it now, before the cleanup loop.
3818      */
3819     memory_region_transaction_commit();
3820 
3821     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3822         if (!virtio_queue_get_num(vdev, n)) {
3823             continue;
3824         }
3825         virtio_bus_cleanup_host_notifier(qbus, n);
3826     }
3827 }
3828 
3829 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3830 {
3831     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3832     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3833 
3834     return virtio_bus_grab_ioeventfd(vbus);
3835 }
3836 
3837 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3838 {
3839     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3840     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3841 
3842     virtio_bus_release_ioeventfd(vbus);
3843 }
3844 
3845 static void virtio_device_class_init(ObjectClass *klass, void *data)
3846 {
3847     /* Set the default value here. */
3848     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3849     DeviceClass *dc = DEVICE_CLASS(klass);
3850 
3851     dc->realize = virtio_device_realize;
3852     dc->unrealize = virtio_device_unrealize;
3853     dc->bus_type = TYPE_VIRTIO_BUS;
3854     device_class_set_props(dc, virtio_properties);
3855     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3856     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3857 
3858     vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3859 }
3860 
3861 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3862 {
3863     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3864     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3865 
3866     return virtio_bus_ioeventfd_enabled(vbus);
3867 }
3868 
3869 static const TypeInfo virtio_device_info = {
3870     .name = TYPE_VIRTIO_DEVICE,
3871     .parent = TYPE_DEVICE,
3872     .instance_size = sizeof(VirtIODevice),
3873     .class_init = virtio_device_class_init,
3874     .instance_finalize = virtio_device_instance_finalize,
3875     .abstract = true,
3876     .class_size = sizeof(VirtioDeviceClass),
3877 };
3878 
3879 static void virtio_register_types(void)
3880 {
3881     type_register_static(&virtio_device_info);
3882 }
3883 
3884 type_init(virtio_register_types)
3885