xref: /openbmc/qemu/hw/virtio/virtio.c (revision 86044b24)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "cpu.h"
17 #include "trace.h"
18 #include "exec/address-spaces.h"
19 #include "qemu/error-report.h"
20 #include "qemu/main-loop.h"
21 #include "qemu/module.h"
22 #include "hw/virtio/virtio.h"
23 #include "migration/qemu-file-types.h"
24 #include "qemu/atomic.h"
25 #include "hw/virtio/virtio-bus.h"
26 #include "hw/qdev-properties.h"
27 #include "hw/virtio/virtio-access.h"
28 #include "sysemu/dma.h"
29 #include "sysemu/runstate.h"
30 
31 /*
32  * The alignment to use between consumer and producer parts of vring.
33  * x86 pagesize again. This is the default, used by transports like PCI
34  * which don't provide a means for the guest to tell the host the alignment.
35  */
36 #define VIRTIO_PCI_VRING_ALIGN         4096
37 
38 typedef struct VRingDesc
39 {
40     uint64_t addr;
41     uint32_t len;
42     uint16_t flags;
43     uint16_t next;
44 } VRingDesc;
45 
46 typedef struct VRingPackedDesc {
47     uint64_t addr;
48     uint32_t len;
49     uint16_t id;
50     uint16_t flags;
51 } VRingPackedDesc;
52 
53 typedef struct VRingAvail
54 {
55     uint16_t flags;
56     uint16_t idx;
57     uint16_t ring[0];
58 } VRingAvail;
59 
60 typedef struct VRingUsedElem
61 {
62     uint32_t id;
63     uint32_t len;
64 } VRingUsedElem;
65 
66 typedef struct VRingUsed
67 {
68     uint16_t flags;
69     uint16_t idx;
70     VRingUsedElem ring[0];
71 } VRingUsed;
72 
73 typedef struct VRingMemoryRegionCaches {
74     struct rcu_head rcu;
75     MemoryRegionCache desc;
76     MemoryRegionCache avail;
77     MemoryRegionCache used;
78 } VRingMemoryRegionCaches;
79 
80 typedef struct VRing
81 {
82     unsigned int num;
83     unsigned int num_default;
84     unsigned int align;
85     hwaddr desc;
86     hwaddr avail;
87     hwaddr used;
88     VRingMemoryRegionCaches *caches;
89 } VRing;
90 
91 typedef struct VRingPackedDescEvent {
92     uint16_t off_wrap;
93     uint16_t flags;
94 } VRingPackedDescEvent ;
95 
96 struct VirtQueue
97 {
98     VRing vring;
99     VirtQueueElement *used_elems;
100 
101     /* Next head to pop */
102     uint16_t last_avail_idx;
103     bool last_avail_wrap_counter;
104 
105     /* Last avail_idx read from VQ. */
106     uint16_t shadow_avail_idx;
107     bool shadow_avail_wrap_counter;
108 
109     uint16_t used_idx;
110     bool used_wrap_counter;
111 
112     /* Last used index value we have signalled on */
113     uint16_t signalled_used;
114 
115     /* Last used index value we have signalled on */
116     bool signalled_used_valid;
117 
118     /* Notification enabled? */
119     bool notification;
120 
121     uint16_t queue_index;
122 
123     unsigned int inuse;
124 
125     uint16_t vector;
126     VirtIOHandleOutput handle_output;
127     VirtIOHandleAIOOutput handle_aio_output;
128     VirtIODevice *vdev;
129     EventNotifier guest_notifier;
130     EventNotifier host_notifier;
131     QLIST_ENTRY(VirtQueue) node;
132 };
133 
134 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
135 {
136     if (!caches) {
137         return;
138     }
139 
140     address_space_cache_destroy(&caches->desc);
141     address_space_cache_destroy(&caches->avail);
142     address_space_cache_destroy(&caches->used);
143     g_free(caches);
144 }
145 
146 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
147 {
148     VRingMemoryRegionCaches *caches;
149 
150     caches = atomic_read(&vq->vring.caches);
151     atomic_rcu_set(&vq->vring.caches, NULL);
152     if (caches) {
153         call_rcu(caches, virtio_free_region_cache, rcu);
154     }
155 }
156 
157 static void virtio_init_region_cache(VirtIODevice *vdev, int n)
158 {
159     VirtQueue *vq = &vdev->vq[n];
160     VRingMemoryRegionCaches *old = vq->vring.caches;
161     VRingMemoryRegionCaches *new = NULL;
162     hwaddr addr, size;
163     int64_t len;
164     bool packed;
165 
166 
167     addr = vq->vring.desc;
168     if (!addr) {
169         goto out_no_cache;
170     }
171     new = g_new0(VRingMemoryRegionCaches, 1);
172     size = virtio_queue_get_desc_size(vdev, n);
173     packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
174                                    true : false;
175     len = address_space_cache_init(&new->desc, vdev->dma_as,
176                                    addr, size, packed);
177     if (len < size) {
178         virtio_error(vdev, "Cannot map desc");
179         goto err_desc;
180     }
181 
182     size = virtio_queue_get_used_size(vdev, n);
183     len = address_space_cache_init(&new->used, vdev->dma_as,
184                                    vq->vring.used, size, true);
185     if (len < size) {
186         virtio_error(vdev, "Cannot map used");
187         goto err_used;
188     }
189 
190     size = virtio_queue_get_avail_size(vdev, n);
191     len = address_space_cache_init(&new->avail, vdev->dma_as,
192                                    vq->vring.avail, size, false);
193     if (len < size) {
194         virtio_error(vdev, "Cannot map avail");
195         goto err_avail;
196     }
197 
198     atomic_rcu_set(&vq->vring.caches, new);
199     if (old) {
200         call_rcu(old, virtio_free_region_cache, rcu);
201     }
202     return;
203 
204 err_avail:
205     address_space_cache_destroy(&new->avail);
206 err_used:
207     address_space_cache_destroy(&new->used);
208 err_desc:
209     address_space_cache_destroy(&new->desc);
210 out_no_cache:
211     g_free(new);
212     virtio_virtqueue_reset_region_cache(vq);
213 }
214 
215 /* virt queue functions */
216 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
217 {
218     VRing *vring = &vdev->vq[n].vring;
219 
220     if (!vring->num || !vring->desc || !vring->align) {
221         /* not yet setup -> nothing to do */
222         return;
223     }
224     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
225     vring->used = vring_align(vring->avail +
226                               offsetof(VRingAvail, ring[vring->num]),
227                               vring->align);
228     virtio_init_region_cache(vdev, n);
229 }
230 
231 /* Called within rcu_read_lock().  */
232 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
233                                   MemoryRegionCache *cache, int i)
234 {
235     address_space_read_cached(cache, i * sizeof(VRingDesc),
236                               desc, sizeof(VRingDesc));
237     virtio_tswap64s(vdev, &desc->addr);
238     virtio_tswap32s(vdev, &desc->len);
239     virtio_tswap16s(vdev, &desc->flags);
240     virtio_tswap16s(vdev, &desc->next);
241 }
242 
243 /* Called within rcu_read_lock().  */
244 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
245 {
246     VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
247     assert(caches != NULL);
248     return caches;
249 }
250 /* Called within rcu_read_lock().  */
251 static inline uint16_t vring_avail_flags(VirtQueue *vq)
252 {
253     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
254     hwaddr pa = offsetof(VRingAvail, flags);
255     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
256 }
257 
258 /* Called within rcu_read_lock().  */
259 static inline uint16_t vring_avail_idx(VirtQueue *vq)
260 {
261     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
262     hwaddr pa = offsetof(VRingAvail, idx);
263     vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
264     return vq->shadow_avail_idx;
265 }
266 
267 /* Called within rcu_read_lock().  */
268 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
269 {
270     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
271     hwaddr pa = offsetof(VRingAvail, ring[i]);
272     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
273 }
274 
275 /* Called within rcu_read_lock().  */
276 static inline uint16_t vring_get_used_event(VirtQueue *vq)
277 {
278     return vring_avail_ring(vq, vq->vring.num);
279 }
280 
281 /* Called within rcu_read_lock().  */
282 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
283                                     int i)
284 {
285     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
286     hwaddr pa = offsetof(VRingUsed, ring[i]);
287     virtio_tswap32s(vq->vdev, &uelem->id);
288     virtio_tswap32s(vq->vdev, &uelem->len);
289     address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
290     address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
291 }
292 
293 /* Called within rcu_read_lock().  */
294 static uint16_t vring_used_idx(VirtQueue *vq)
295 {
296     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
297     hwaddr pa = offsetof(VRingUsed, idx);
298     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
299 }
300 
301 /* Called within rcu_read_lock().  */
302 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
303 {
304     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
305     hwaddr pa = offsetof(VRingUsed, idx);
306     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
307     address_space_cache_invalidate(&caches->used, pa, sizeof(val));
308     vq->used_idx = val;
309 }
310 
311 /* Called within rcu_read_lock().  */
312 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
313 {
314     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
315     VirtIODevice *vdev = vq->vdev;
316     hwaddr pa = offsetof(VRingUsed, flags);
317     uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
318 
319     virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
320     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
321 }
322 
323 /* Called within rcu_read_lock().  */
324 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
325 {
326     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
327     VirtIODevice *vdev = vq->vdev;
328     hwaddr pa = offsetof(VRingUsed, flags);
329     uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
330 
331     virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
332     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
333 }
334 
335 /* Called within rcu_read_lock().  */
336 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
337 {
338     VRingMemoryRegionCaches *caches;
339     hwaddr pa;
340     if (!vq->notification) {
341         return;
342     }
343 
344     caches = vring_get_region_caches(vq);
345     pa = offsetof(VRingUsed, ring[vq->vring.num]);
346     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
347     address_space_cache_invalidate(&caches->used, pa, sizeof(val));
348 }
349 
350 void virtio_queue_set_notification(VirtQueue *vq, int enable)
351 {
352     vq->notification = enable;
353 
354     if (!vq->vring.desc) {
355         return;
356     }
357 
358     rcu_read_lock();
359     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
360         vring_set_avail_event(vq, vring_avail_idx(vq));
361     } else if (enable) {
362         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
363     } else {
364         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
365     }
366     if (enable) {
367         /* Expose avail event/used flags before caller checks the avail idx. */
368         smp_mb();
369     }
370     rcu_read_unlock();
371 }
372 
373 int virtio_queue_ready(VirtQueue *vq)
374 {
375     return vq->vring.avail != 0;
376 }
377 
378 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
379                                          uint16_t *flags,
380                                          MemoryRegionCache *cache,
381                                          int i)
382 {
383     address_space_read_cached(cache,
384                               i * sizeof(VRingPackedDesc) +
385                               offsetof(VRingPackedDesc, flags),
386                               flags, sizeof(*flags));
387     virtio_tswap16s(vdev, flags);
388 }
389 
390 static void vring_packed_desc_read(VirtIODevice *vdev,
391                                    VRingPackedDesc *desc,
392                                    MemoryRegionCache *cache,
393                                    int i, bool strict_order)
394 {
395     hwaddr off = i * sizeof(VRingPackedDesc);
396 
397     vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
398 
399     if (strict_order) {
400         /* Make sure flags is read before the rest fields. */
401         smp_rmb();
402     }
403 
404     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
405                               &desc->addr, sizeof(desc->addr));
406     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
407                               &desc->id, sizeof(desc->id));
408     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
409                               &desc->len, sizeof(desc->len));
410     virtio_tswap64s(vdev, &desc->addr);
411     virtio_tswap16s(vdev, &desc->id);
412     virtio_tswap32s(vdev, &desc->len);
413 }
414 
415 static void vring_packed_desc_write_data(VirtIODevice *vdev,
416                                          VRingPackedDesc *desc,
417                                          MemoryRegionCache *cache,
418                                          int i)
419 {
420     hwaddr off_id = i * sizeof(VRingPackedDesc) +
421                     offsetof(VRingPackedDesc, id);
422     hwaddr off_len = i * sizeof(VRingPackedDesc) +
423                     offsetof(VRingPackedDesc, len);
424 
425     virtio_tswap32s(vdev, &desc->len);
426     virtio_tswap16s(vdev, &desc->id);
427     address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
428     address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
429     address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
430     address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
431 }
432 
433 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
434                                           VRingPackedDesc *desc,
435                                           MemoryRegionCache *cache,
436                                           int i)
437 {
438     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
439 
440     virtio_tswap16s(vdev, &desc->flags);
441     address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags));
442     address_space_cache_invalidate(cache, off, sizeof(desc->flags));
443 }
444 
445 static void vring_packed_desc_write(VirtIODevice *vdev,
446                                     VRingPackedDesc *desc,
447                                     MemoryRegionCache *cache,
448                                     int i, bool strict_order)
449 {
450     vring_packed_desc_write_data(vdev, desc, cache, i);
451     if (strict_order) {
452         /* Make sure data is wrote before flags. */
453         smp_wmb();
454     }
455     vring_packed_desc_write_flags(vdev, desc, cache, i);
456 }
457 
458 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
459 {
460     bool avail, used;
461 
462     avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
463     used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
464     return (avail != used) && (avail == wrap_counter);
465 }
466 
467 /* Fetch avail_idx from VQ memory only when we really need to know if
468  * guest has added some buffers.
469  * Called within rcu_read_lock().  */
470 static int virtio_queue_empty_rcu(VirtQueue *vq)
471 {
472     if (unlikely(vq->vdev->broken)) {
473         return 1;
474     }
475 
476     if (unlikely(!vq->vring.avail)) {
477         return 1;
478     }
479 
480     if (vq->shadow_avail_idx != vq->last_avail_idx) {
481         return 0;
482     }
483 
484     return vring_avail_idx(vq) == vq->last_avail_idx;
485 }
486 
487 static int virtio_queue_split_empty(VirtQueue *vq)
488 {
489     bool empty;
490 
491     if (unlikely(vq->vdev->broken)) {
492         return 1;
493     }
494 
495     if (unlikely(!vq->vring.avail)) {
496         return 1;
497     }
498 
499     if (vq->shadow_avail_idx != vq->last_avail_idx) {
500         return 0;
501     }
502 
503     rcu_read_lock();
504     empty = vring_avail_idx(vq) == vq->last_avail_idx;
505     rcu_read_unlock();
506     return empty;
507 }
508 
509 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
510 {
511     struct VRingPackedDesc desc;
512     VRingMemoryRegionCaches *cache;
513 
514     if (unlikely(!vq->vring.desc)) {
515         return 1;
516     }
517 
518     cache = vring_get_region_caches(vq);
519     vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
520                                  vq->last_avail_idx);
521 
522     return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
523 }
524 
525 static int virtio_queue_packed_empty(VirtQueue *vq)
526 {
527     bool empty;
528 
529     rcu_read_lock();
530     empty = virtio_queue_packed_empty_rcu(vq);
531     rcu_read_unlock();
532     return empty;
533 }
534 
535 int virtio_queue_empty(VirtQueue *vq)
536 {
537     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
538         return virtio_queue_packed_empty(vq);
539     } else {
540         return virtio_queue_split_empty(vq);
541     }
542 }
543 
544 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
545                                unsigned int len)
546 {
547     AddressSpace *dma_as = vq->vdev->dma_as;
548     unsigned int offset;
549     int i;
550 
551     offset = 0;
552     for (i = 0; i < elem->in_num; i++) {
553         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
554 
555         dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
556                          elem->in_sg[i].iov_len,
557                          DMA_DIRECTION_FROM_DEVICE, size);
558 
559         offset += size;
560     }
561 
562     for (i = 0; i < elem->out_num; i++)
563         dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
564                          elem->out_sg[i].iov_len,
565                          DMA_DIRECTION_TO_DEVICE,
566                          elem->out_sg[i].iov_len);
567 }
568 
569 /* virtqueue_detach_element:
570  * @vq: The #VirtQueue
571  * @elem: The #VirtQueueElement
572  * @len: number of bytes written
573  *
574  * Detach the element from the virtqueue.  This function is suitable for device
575  * reset or other situations where a #VirtQueueElement is simply freed and will
576  * not be pushed or discarded.
577  */
578 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
579                               unsigned int len)
580 {
581     vq->inuse -= elem->ndescs;
582     virtqueue_unmap_sg(vq, elem, len);
583 }
584 
585 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
586 {
587     vq->last_avail_idx -= num;
588 }
589 
590 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
591 {
592     if (vq->last_avail_idx < num) {
593         vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
594         vq->last_avail_wrap_counter ^= 1;
595     } else {
596         vq->last_avail_idx -= num;
597     }
598 }
599 
600 /* virtqueue_unpop:
601  * @vq: The #VirtQueue
602  * @elem: The #VirtQueueElement
603  * @len: number of bytes written
604  *
605  * Pretend the most recent element wasn't popped from the virtqueue.  The next
606  * call to virtqueue_pop() will refetch the element.
607  */
608 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
609                      unsigned int len)
610 {
611 
612     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
613         virtqueue_packed_rewind(vq, 1);
614     } else {
615         virtqueue_split_rewind(vq, 1);
616     }
617 
618     virtqueue_detach_element(vq, elem, len);
619 }
620 
621 /* virtqueue_rewind:
622  * @vq: The #VirtQueue
623  * @num: Number of elements to push back
624  *
625  * Pretend that elements weren't popped from the virtqueue.  The next
626  * virtqueue_pop() will refetch the oldest element.
627  *
628  * Use virtqueue_unpop() instead if you have a VirtQueueElement.
629  *
630  * Returns: true on success, false if @num is greater than the number of in use
631  * elements.
632  */
633 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
634 {
635     if (num > vq->inuse) {
636         return false;
637     }
638 
639     vq->inuse -= num;
640     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
641         virtqueue_packed_rewind(vq, num);
642     } else {
643         virtqueue_split_rewind(vq, num);
644     }
645     return true;
646 }
647 
648 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
649                     unsigned int len, unsigned int idx)
650 {
651     VRingUsedElem uelem;
652 
653     if (unlikely(!vq->vring.used)) {
654         return;
655     }
656 
657     idx = (idx + vq->used_idx) % vq->vring.num;
658 
659     uelem.id = elem->index;
660     uelem.len = len;
661     vring_used_write(vq, &uelem, idx);
662 }
663 
664 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
665                                   unsigned int len, unsigned int idx)
666 {
667     vq->used_elems[idx].index = elem->index;
668     vq->used_elems[idx].len = len;
669     vq->used_elems[idx].ndescs = elem->ndescs;
670 }
671 
672 static void virtqueue_packed_fill_desc(VirtQueue *vq,
673                                        const VirtQueueElement *elem,
674                                        unsigned int idx,
675                                        bool strict_order)
676 {
677     uint16_t head;
678     VRingMemoryRegionCaches *caches;
679     VRingPackedDesc desc = {
680         .id = elem->index,
681         .len = elem->len,
682     };
683     bool wrap_counter = vq->used_wrap_counter;
684 
685     if (unlikely(!vq->vring.desc)) {
686         return;
687     }
688 
689     head = vq->used_idx + idx;
690     if (head >= vq->vring.num) {
691         head -= vq->vring.num;
692         wrap_counter ^= 1;
693     }
694     if (wrap_counter) {
695         desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
696         desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
697     } else {
698         desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
699         desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
700     }
701 
702     caches = vring_get_region_caches(vq);
703     vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
704 }
705 
706 /* Called within rcu_read_lock().  */
707 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
708                     unsigned int len, unsigned int idx)
709 {
710     trace_virtqueue_fill(vq, elem, len, idx);
711 
712     virtqueue_unmap_sg(vq, elem, len);
713 
714     if (unlikely(vq->vdev->broken)) {
715         return;
716     }
717 
718     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
719         virtqueue_packed_fill(vq, elem, len, idx);
720     } else {
721         virtqueue_split_fill(vq, elem, len, idx);
722     }
723 }
724 
725 /* Called within rcu_read_lock().  */
726 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
727 {
728     uint16_t old, new;
729 
730     if (unlikely(!vq->vring.used)) {
731         return;
732     }
733 
734     /* Make sure buffer is written before we update index. */
735     smp_wmb();
736     trace_virtqueue_flush(vq, count);
737     old = vq->used_idx;
738     new = old + count;
739     vring_used_idx_set(vq, new);
740     vq->inuse -= count;
741     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
742         vq->signalled_used_valid = false;
743 }
744 
745 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
746 {
747     unsigned int i, ndescs = 0;
748 
749     if (unlikely(!vq->vring.desc)) {
750         return;
751     }
752 
753     for (i = 1; i < count; i++) {
754         virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
755         ndescs += vq->used_elems[i].ndescs;
756     }
757     virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
758     ndescs += vq->used_elems[0].ndescs;
759 
760     vq->inuse -= ndescs;
761     vq->used_idx += ndescs;
762     if (vq->used_idx >= vq->vring.num) {
763         vq->used_idx -= vq->vring.num;
764         vq->used_wrap_counter ^= 1;
765     }
766 }
767 
768 void virtqueue_flush(VirtQueue *vq, unsigned int count)
769 {
770     if (unlikely(vq->vdev->broken)) {
771         vq->inuse -= count;
772         return;
773     }
774 
775     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
776         virtqueue_packed_flush(vq, count);
777     } else {
778         virtqueue_split_flush(vq, count);
779     }
780 }
781 
782 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
783                     unsigned int len)
784 {
785     rcu_read_lock();
786     virtqueue_fill(vq, elem, len, 0);
787     virtqueue_flush(vq, 1);
788     rcu_read_unlock();
789 }
790 
791 /* Called within rcu_read_lock().  */
792 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
793 {
794     uint16_t num_heads = vring_avail_idx(vq) - idx;
795 
796     /* Check it isn't doing very strange things with descriptor numbers. */
797     if (num_heads > vq->vring.num) {
798         virtio_error(vq->vdev, "Guest moved used index from %u to %u",
799                      idx, vq->shadow_avail_idx);
800         return -EINVAL;
801     }
802     /* On success, callers read a descriptor at vq->last_avail_idx.
803      * Make sure descriptor read does not bypass avail index read. */
804     if (num_heads) {
805         smp_rmb();
806     }
807 
808     return num_heads;
809 }
810 
811 /* Called within rcu_read_lock().  */
812 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
813                                unsigned int *head)
814 {
815     /* Grab the next descriptor number they're advertising, and increment
816      * the index we've seen. */
817     *head = vring_avail_ring(vq, idx % vq->vring.num);
818 
819     /* If their number is silly, that's a fatal mistake. */
820     if (*head >= vq->vring.num) {
821         virtio_error(vq->vdev, "Guest says index %u is available", *head);
822         return false;
823     }
824 
825     return true;
826 }
827 
828 enum {
829     VIRTQUEUE_READ_DESC_ERROR = -1,
830     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
831     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
832 };
833 
834 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
835                                           MemoryRegionCache *desc_cache,
836                                           unsigned int max, unsigned int *next)
837 {
838     /* If this descriptor says it doesn't chain, we're done. */
839     if (!(desc->flags & VRING_DESC_F_NEXT)) {
840         return VIRTQUEUE_READ_DESC_DONE;
841     }
842 
843     /* Check they're not leading us off end of descriptors. */
844     *next = desc->next;
845     /* Make sure compiler knows to grab that: we don't want it changing! */
846     smp_wmb();
847 
848     if (*next >= max) {
849         virtio_error(vdev, "Desc next is %u", *next);
850         return VIRTQUEUE_READ_DESC_ERROR;
851     }
852 
853     vring_split_desc_read(vdev, desc, desc_cache, *next);
854     return VIRTQUEUE_READ_DESC_MORE;
855 }
856 
857 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
858                             unsigned int *in_bytes, unsigned int *out_bytes,
859                             unsigned max_in_bytes, unsigned max_out_bytes)
860 {
861     VirtIODevice *vdev = vq->vdev;
862     unsigned int max, idx;
863     unsigned int total_bufs, in_total, out_total;
864     VRingMemoryRegionCaches *caches;
865     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
866     int64_t len = 0;
867     int rc;
868 
869     rcu_read_lock();
870     idx = vq->last_avail_idx;
871     total_bufs = in_total = out_total = 0;
872 
873     max = vq->vring.num;
874     caches = vring_get_region_caches(vq);
875     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
876         MemoryRegionCache *desc_cache = &caches->desc;
877         unsigned int num_bufs;
878         VRingDesc desc;
879         unsigned int i;
880 
881         num_bufs = total_bufs;
882 
883         if (!virtqueue_get_head(vq, idx++, &i)) {
884             goto err;
885         }
886 
887         vring_split_desc_read(vdev, &desc, desc_cache, i);
888 
889         if (desc.flags & VRING_DESC_F_INDIRECT) {
890             if (!desc.len || (desc.len % sizeof(VRingDesc))) {
891                 virtio_error(vdev, "Invalid size for indirect buffer table");
892                 goto err;
893             }
894 
895             /* If we've got too many, that implies a descriptor loop. */
896             if (num_bufs >= max) {
897                 virtio_error(vdev, "Looped descriptor");
898                 goto err;
899             }
900 
901             /* loop over the indirect descriptor table */
902             len = address_space_cache_init(&indirect_desc_cache,
903                                            vdev->dma_as,
904                                            desc.addr, desc.len, false);
905             desc_cache = &indirect_desc_cache;
906             if (len < desc.len) {
907                 virtio_error(vdev, "Cannot map indirect buffer");
908                 goto err;
909             }
910 
911             max = desc.len / sizeof(VRingDesc);
912             num_bufs = i = 0;
913             vring_split_desc_read(vdev, &desc, desc_cache, i);
914         }
915 
916         do {
917             /* If we've got too many, that implies a descriptor loop. */
918             if (++num_bufs > max) {
919                 virtio_error(vdev, "Looped descriptor");
920                 goto err;
921             }
922 
923             if (desc.flags & VRING_DESC_F_WRITE) {
924                 in_total += desc.len;
925             } else {
926                 out_total += desc.len;
927             }
928             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
929                 goto done;
930             }
931 
932             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
933         } while (rc == VIRTQUEUE_READ_DESC_MORE);
934 
935         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
936             goto err;
937         }
938 
939         if (desc_cache == &indirect_desc_cache) {
940             address_space_cache_destroy(&indirect_desc_cache);
941             total_bufs++;
942         } else {
943             total_bufs = num_bufs;
944         }
945     }
946 
947     if (rc < 0) {
948         goto err;
949     }
950 
951 done:
952     address_space_cache_destroy(&indirect_desc_cache);
953     if (in_bytes) {
954         *in_bytes = in_total;
955     }
956     if (out_bytes) {
957         *out_bytes = out_total;
958     }
959     rcu_read_unlock();
960     return;
961 
962 err:
963     in_total = out_total = 0;
964     goto done;
965 }
966 
967 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
968                                            VRingPackedDesc *desc,
969                                            MemoryRegionCache
970                                            *desc_cache,
971                                            unsigned int max,
972                                            unsigned int *next,
973                                            bool indirect)
974 {
975     /* If this descriptor says it doesn't chain, we're done. */
976     if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
977         return VIRTQUEUE_READ_DESC_DONE;
978     }
979 
980     ++*next;
981     if (*next == max) {
982         if (indirect) {
983             return VIRTQUEUE_READ_DESC_DONE;
984         } else {
985             (*next) -= vq->vring.num;
986         }
987     }
988 
989     vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
990     return VIRTQUEUE_READ_DESC_MORE;
991 }
992 
993 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
994                                              unsigned int *in_bytes,
995                                              unsigned int *out_bytes,
996                                              unsigned max_in_bytes,
997                                              unsigned max_out_bytes)
998 {
999     VirtIODevice *vdev = vq->vdev;
1000     unsigned int max, idx;
1001     unsigned int total_bufs, in_total, out_total;
1002     MemoryRegionCache *desc_cache;
1003     VRingMemoryRegionCaches *caches;
1004     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1005     int64_t len = 0;
1006     VRingPackedDesc desc;
1007     bool wrap_counter;
1008 
1009     rcu_read_lock();
1010     idx = vq->last_avail_idx;
1011     wrap_counter = vq->last_avail_wrap_counter;
1012     total_bufs = in_total = out_total = 0;
1013 
1014     max = vq->vring.num;
1015     caches = vring_get_region_caches(vq);
1016 
1017     for (;;) {
1018         unsigned int num_bufs = total_bufs;
1019         unsigned int i = idx;
1020         int rc;
1021 
1022         desc_cache = &caches->desc;
1023         vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1024         if (!is_desc_avail(desc.flags, wrap_counter)) {
1025             break;
1026         }
1027 
1028         if (desc.flags & VRING_DESC_F_INDIRECT) {
1029             if (desc.len % sizeof(VRingPackedDesc)) {
1030                 virtio_error(vdev, "Invalid size for indirect buffer table");
1031                 goto err;
1032             }
1033 
1034             /* If we've got too many, that implies a descriptor loop. */
1035             if (num_bufs >= max) {
1036                 virtio_error(vdev, "Looped descriptor");
1037                 goto err;
1038             }
1039 
1040             /* loop over the indirect descriptor table */
1041             len = address_space_cache_init(&indirect_desc_cache,
1042                                            vdev->dma_as,
1043                                            desc.addr, desc.len, false);
1044             desc_cache = &indirect_desc_cache;
1045             if (len < desc.len) {
1046                 virtio_error(vdev, "Cannot map indirect buffer");
1047                 goto err;
1048             }
1049 
1050             max = desc.len / sizeof(VRingPackedDesc);
1051             num_bufs = i = 0;
1052             vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1053         }
1054 
1055         do {
1056             /* If we've got too many, that implies a descriptor loop. */
1057             if (++num_bufs > max) {
1058                 virtio_error(vdev, "Looped descriptor");
1059                 goto err;
1060             }
1061 
1062             if (desc.flags & VRING_DESC_F_WRITE) {
1063                 in_total += desc.len;
1064             } else {
1065                 out_total += desc.len;
1066             }
1067             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1068                 goto done;
1069             }
1070 
1071             rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1072                                                  &i, desc_cache ==
1073                                                  &indirect_desc_cache);
1074         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1075 
1076         if (desc_cache == &indirect_desc_cache) {
1077             address_space_cache_destroy(&indirect_desc_cache);
1078             total_bufs++;
1079             idx++;
1080         } else {
1081             idx += num_bufs - total_bufs;
1082             total_bufs = num_bufs;
1083         }
1084 
1085         if (idx >= vq->vring.num) {
1086             idx -= vq->vring.num;
1087             wrap_counter ^= 1;
1088         }
1089     }
1090 
1091     /* Record the index and wrap counter for a kick we want */
1092     vq->shadow_avail_idx = idx;
1093     vq->shadow_avail_wrap_counter = wrap_counter;
1094 done:
1095     address_space_cache_destroy(&indirect_desc_cache);
1096     if (in_bytes) {
1097         *in_bytes = in_total;
1098     }
1099     if (out_bytes) {
1100         *out_bytes = out_total;
1101     }
1102     rcu_read_unlock();
1103     return;
1104 
1105 err:
1106     in_total = out_total = 0;
1107     goto done;
1108 }
1109 
1110 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1111                                unsigned int *out_bytes,
1112                                unsigned max_in_bytes, unsigned max_out_bytes)
1113 {
1114     uint16_t desc_size;
1115     VRingMemoryRegionCaches *caches;
1116 
1117     if (unlikely(!vq->vring.desc)) {
1118         goto err;
1119     }
1120 
1121     caches = vring_get_region_caches(vq);
1122     desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1123                                 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1124     if (caches->desc.len < vq->vring.num * desc_size) {
1125         virtio_error(vq->vdev, "Cannot map descriptor ring");
1126         goto err;
1127     }
1128 
1129     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1130         virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1131                                          max_in_bytes, max_out_bytes);
1132     } else {
1133         virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1134                                         max_in_bytes, max_out_bytes);
1135     }
1136 
1137     return;
1138 err:
1139     if (in_bytes) {
1140         *in_bytes = 0;
1141     }
1142     if (out_bytes) {
1143         *out_bytes = 0;
1144     }
1145 }
1146 
1147 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1148                           unsigned int out_bytes)
1149 {
1150     unsigned int in_total, out_total;
1151 
1152     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1153     return in_bytes <= in_total && out_bytes <= out_total;
1154 }
1155 
1156 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1157                                hwaddr *addr, struct iovec *iov,
1158                                unsigned int max_num_sg, bool is_write,
1159                                hwaddr pa, size_t sz)
1160 {
1161     bool ok = false;
1162     unsigned num_sg = *p_num_sg;
1163     assert(num_sg <= max_num_sg);
1164 
1165     if (!sz) {
1166         virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1167         goto out;
1168     }
1169 
1170     while (sz) {
1171         hwaddr len = sz;
1172 
1173         if (num_sg == max_num_sg) {
1174             virtio_error(vdev, "virtio: too many write descriptors in "
1175                                "indirect table");
1176             goto out;
1177         }
1178 
1179         iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1180                                               is_write ?
1181                                               DMA_DIRECTION_FROM_DEVICE :
1182                                               DMA_DIRECTION_TO_DEVICE);
1183         if (!iov[num_sg].iov_base) {
1184             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1185             goto out;
1186         }
1187 
1188         iov[num_sg].iov_len = len;
1189         addr[num_sg] = pa;
1190 
1191         sz -= len;
1192         pa += len;
1193         num_sg++;
1194     }
1195     ok = true;
1196 
1197 out:
1198     *p_num_sg = num_sg;
1199     return ok;
1200 }
1201 
1202 /* Only used by error code paths before we have a VirtQueueElement (therefore
1203  * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1204  * yet.
1205  */
1206 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1207                                     struct iovec *iov)
1208 {
1209     unsigned int i;
1210 
1211     for (i = 0; i < out_num + in_num; i++) {
1212         int is_write = i >= out_num;
1213 
1214         cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1215         iov++;
1216     }
1217 }
1218 
1219 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1220                                 hwaddr *addr, unsigned int num_sg,
1221                                 int is_write)
1222 {
1223     unsigned int i;
1224     hwaddr len;
1225 
1226     for (i = 0; i < num_sg; i++) {
1227         len = sg[i].iov_len;
1228         sg[i].iov_base = dma_memory_map(vdev->dma_as,
1229                                         addr[i], &len, is_write ?
1230                                         DMA_DIRECTION_FROM_DEVICE :
1231                                         DMA_DIRECTION_TO_DEVICE);
1232         if (!sg[i].iov_base) {
1233             error_report("virtio: error trying to map MMIO memory");
1234             exit(1);
1235         }
1236         if (len != sg[i].iov_len) {
1237             error_report("virtio: unexpected memory split");
1238             exit(1);
1239         }
1240     }
1241 }
1242 
1243 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1244 {
1245     virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, 1);
1246     virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num, 0);
1247 }
1248 
1249 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1250 {
1251     VirtQueueElement *elem;
1252     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1253     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1254     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1255     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1256     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1257     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1258 
1259     assert(sz >= sizeof(VirtQueueElement));
1260     elem = g_malloc(out_sg_end);
1261     trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1262     elem->out_num = out_num;
1263     elem->in_num = in_num;
1264     elem->in_addr = (void *)elem + in_addr_ofs;
1265     elem->out_addr = (void *)elem + out_addr_ofs;
1266     elem->in_sg = (void *)elem + in_sg_ofs;
1267     elem->out_sg = (void *)elem + out_sg_ofs;
1268     return elem;
1269 }
1270 
1271 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1272 {
1273     unsigned int i, head, max;
1274     VRingMemoryRegionCaches *caches;
1275     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1276     MemoryRegionCache *desc_cache;
1277     int64_t len;
1278     VirtIODevice *vdev = vq->vdev;
1279     VirtQueueElement *elem = NULL;
1280     unsigned out_num, in_num, elem_entries;
1281     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1282     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1283     VRingDesc desc;
1284     int rc;
1285 
1286     rcu_read_lock();
1287     if (virtio_queue_empty_rcu(vq)) {
1288         goto done;
1289     }
1290     /* Needed after virtio_queue_empty(), see comment in
1291      * virtqueue_num_heads(). */
1292     smp_rmb();
1293 
1294     /* When we start there are none of either input nor output. */
1295     out_num = in_num = elem_entries = 0;
1296 
1297     max = vq->vring.num;
1298 
1299     if (vq->inuse >= vq->vring.num) {
1300         virtio_error(vdev, "Virtqueue size exceeded");
1301         goto done;
1302     }
1303 
1304     if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1305         goto done;
1306     }
1307 
1308     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1309         vring_set_avail_event(vq, vq->last_avail_idx);
1310     }
1311 
1312     i = head;
1313 
1314     caches = vring_get_region_caches(vq);
1315     if (caches->desc.len < max * sizeof(VRingDesc)) {
1316         virtio_error(vdev, "Cannot map descriptor ring");
1317         goto done;
1318     }
1319 
1320     desc_cache = &caches->desc;
1321     vring_split_desc_read(vdev, &desc, desc_cache, i);
1322     if (desc.flags & VRING_DESC_F_INDIRECT) {
1323         if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1324             virtio_error(vdev, "Invalid size for indirect buffer table");
1325             goto done;
1326         }
1327 
1328         /* loop over the indirect descriptor table */
1329         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1330                                        desc.addr, desc.len, false);
1331         desc_cache = &indirect_desc_cache;
1332         if (len < desc.len) {
1333             virtio_error(vdev, "Cannot map indirect buffer");
1334             goto done;
1335         }
1336 
1337         max = desc.len / sizeof(VRingDesc);
1338         i = 0;
1339         vring_split_desc_read(vdev, &desc, desc_cache, i);
1340     }
1341 
1342     /* Collect all the descriptors */
1343     do {
1344         bool map_ok;
1345 
1346         if (desc.flags & VRING_DESC_F_WRITE) {
1347             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1348                                         iov + out_num,
1349                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1350                                         desc.addr, desc.len);
1351         } else {
1352             if (in_num) {
1353                 virtio_error(vdev, "Incorrect order for descriptors");
1354                 goto err_undo_map;
1355             }
1356             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1357                                         VIRTQUEUE_MAX_SIZE, false,
1358                                         desc.addr, desc.len);
1359         }
1360         if (!map_ok) {
1361             goto err_undo_map;
1362         }
1363 
1364         /* If we've got too many, that implies a descriptor loop. */
1365         if (++elem_entries > max) {
1366             virtio_error(vdev, "Looped descriptor");
1367             goto err_undo_map;
1368         }
1369 
1370         rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1371     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1372 
1373     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1374         goto err_undo_map;
1375     }
1376 
1377     /* Now copy what we have collected and mapped */
1378     elem = virtqueue_alloc_element(sz, out_num, in_num);
1379     elem->index = head;
1380     elem->ndescs = 1;
1381     for (i = 0; i < out_num; i++) {
1382         elem->out_addr[i] = addr[i];
1383         elem->out_sg[i] = iov[i];
1384     }
1385     for (i = 0; i < in_num; i++) {
1386         elem->in_addr[i] = addr[out_num + i];
1387         elem->in_sg[i] = iov[out_num + i];
1388     }
1389 
1390     vq->inuse++;
1391 
1392     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1393 done:
1394     address_space_cache_destroy(&indirect_desc_cache);
1395     rcu_read_unlock();
1396 
1397     return elem;
1398 
1399 err_undo_map:
1400     virtqueue_undo_map_desc(out_num, in_num, iov);
1401     goto done;
1402 }
1403 
1404 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1405 {
1406     unsigned int i, max;
1407     VRingMemoryRegionCaches *caches;
1408     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1409     MemoryRegionCache *desc_cache;
1410     int64_t len;
1411     VirtIODevice *vdev = vq->vdev;
1412     VirtQueueElement *elem = NULL;
1413     unsigned out_num, in_num, elem_entries;
1414     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1415     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1416     VRingPackedDesc desc;
1417     uint16_t id;
1418     int rc;
1419 
1420     rcu_read_lock();
1421     if (virtio_queue_packed_empty_rcu(vq)) {
1422         goto done;
1423     }
1424 
1425     /* When we start there are none of either input nor output. */
1426     out_num = in_num = elem_entries = 0;
1427 
1428     max = vq->vring.num;
1429 
1430     if (vq->inuse >= vq->vring.num) {
1431         virtio_error(vdev, "Virtqueue size exceeded");
1432         goto done;
1433     }
1434 
1435     i = vq->last_avail_idx;
1436 
1437     caches = vring_get_region_caches(vq);
1438     if (caches->desc.len < max * sizeof(VRingDesc)) {
1439         virtio_error(vdev, "Cannot map descriptor ring");
1440         goto done;
1441     }
1442 
1443     desc_cache = &caches->desc;
1444     vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1445     id = desc.id;
1446     if (desc.flags & VRING_DESC_F_INDIRECT) {
1447         if (desc.len % sizeof(VRingPackedDesc)) {
1448             virtio_error(vdev, "Invalid size for indirect buffer table");
1449             goto done;
1450         }
1451 
1452         /* loop over the indirect descriptor table */
1453         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1454                                        desc.addr, desc.len, false);
1455         desc_cache = &indirect_desc_cache;
1456         if (len < desc.len) {
1457             virtio_error(vdev, "Cannot map indirect buffer");
1458             goto done;
1459         }
1460 
1461         max = desc.len / sizeof(VRingPackedDesc);
1462         i = 0;
1463         vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1464     }
1465 
1466     /* Collect all the descriptors */
1467     do {
1468         bool map_ok;
1469 
1470         if (desc.flags & VRING_DESC_F_WRITE) {
1471             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1472                                         iov + out_num,
1473                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1474                                         desc.addr, desc.len);
1475         } else {
1476             if (in_num) {
1477                 virtio_error(vdev, "Incorrect order for descriptors");
1478                 goto err_undo_map;
1479             }
1480             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1481                                         VIRTQUEUE_MAX_SIZE, false,
1482                                         desc.addr, desc.len);
1483         }
1484         if (!map_ok) {
1485             goto err_undo_map;
1486         }
1487 
1488         /* If we've got too many, that implies a descriptor loop. */
1489         if (++elem_entries > max) {
1490             virtio_error(vdev, "Looped descriptor");
1491             goto err_undo_map;
1492         }
1493 
1494         rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1495                                              desc_cache ==
1496                                              &indirect_desc_cache);
1497     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1498 
1499     /* Now copy what we have collected and mapped */
1500     elem = virtqueue_alloc_element(sz, out_num, in_num);
1501     for (i = 0; i < out_num; i++) {
1502         elem->out_addr[i] = addr[i];
1503         elem->out_sg[i] = iov[i];
1504     }
1505     for (i = 0; i < in_num; i++) {
1506         elem->in_addr[i] = addr[out_num + i];
1507         elem->in_sg[i] = iov[out_num + i];
1508     }
1509 
1510     elem->index = id;
1511     elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1512     vq->last_avail_idx += elem->ndescs;
1513     vq->inuse += elem->ndescs;
1514 
1515     if (vq->last_avail_idx >= vq->vring.num) {
1516         vq->last_avail_idx -= vq->vring.num;
1517         vq->last_avail_wrap_counter ^= 1;
1518     }
1519 
1520     vq->shadow_avail_idx = vq->last_avail_idx;
1521     vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1522 
1523     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1524 done:
1525     address_space_cache_destroy(&indirect_desc_cache);
1526     rcu_read_unlock();
1527 
1528     return elem;
1529 
1530 err_undo_map:
1531     virtqueue_undo_map_desc(out_num, in_num, iov);
1532     goto done;
1533 }
1534 
1535 void *virtqueue_pop(VirtQueue *vq, size_t sz)
1536 {
1537     if (unlikely(vq->vdev->broken)) {
1538         return NULL;
1539     }
1540 
1541     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1542         return virtqueue_packed_pop(vq, sz);
1543     } else {
1544         return virtqueue_split_pop(vq, sz);
1545     }
1546 }
1547 
1548 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1549 {
1550     VRingMemoryRegionCaches *caches;
1551     MemoryRegionCache *desc_cache;
1552     unsigned int dropped = 0;
1553     VirtQueueElement elem = {};
1554     VirtIODevice *vdev = vq->vdev;
1555     VRingPackedDesc desc;
1556 
1557     caches = vring_get_region_caches(vq);
1558     desc_cache = &caches->desc;
1559 
1560     virtio_queue_set_notification(vq, 0);
1561 
1562     while (vq->inuse < vq->vring.num) {
1563         unsigned int idx = vq->last_avail_idx;
1564         /*
1565          * works similar to virtqueue_pop but does not map buffers
1566          * and does not allocate any memory.
1567          */
1568         vring_packed_desc_read(vdev, &desc, desc_cache,
1569                                vq->last_avail_idx , true);
1570         if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1571             break;
1572         }
1573         elem.index = desc.id;
1574         elem.ndescs = 1;
1575         while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1576                                                vq->vring.num, &idx, false)) {
1577             ++elem.ndescs;
1578         }
1579         /*
1580          * immediately push the element, nothing to unmap
1581          * as both in_num and out_num are set to 0.
1582          */
1583         virtqueue_push(vq, &elem, 0);
1584         dropped++;
1585         vq->last_avail_idx += elem.ndescs;
1586         if (vq->last_avail_idx >= vq->vring.num) {
1587             vq->last_avail_idx -= vq->vring.num;
1588             vq->last_avail_wrap_counter ^= 1;
1589         }
1590     }
1591 
1592     return dropped;
1593 }
1594 
1595 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1596 {
1597     unsigned int dropped = 0;
1598     VirtQueueElement elem = {};
1599     VirtIODevice *vdev = vq->vdev;
1600     bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1601 
1602     while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1603         /* works similar to virtqueue_pop but does not map buffers
1604         * and does not allocate any memory */
1605         smp_rmb();
1606         if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1607             break;
1608         }
1609         vq->inuse++;
1610         vq->last_avail_idx++;
1611         if (fEventIdx) {
1612             vring_set_avail_event(vq, vq->last_avail_idx);
1613         }
1614         /* immediately push the element, nothing to unmap
1615          * as both in_num and out_num are set to 0 */
1616         virtqueue_push(vq, &elem, 0);
1617         dropped++;
1618     }
1619 
1620     return dropped;
1621 }
1622 
1623 /* virtqueue_drop_all:
1624  * @vq: The #VirtQueue
1625  * Drops all queued buffers and indicates them to the guest
1626  * as if they are done. Useful when buffers can not be
1627  * processed but must be returned to the guest.
1628  */
1629 unsigned int virtqueue_drop_all(VirtQueue *vq)
1630 {
1631     struct VirtIODevice *vdev = vq->vdev;
1632 
1633     if (unlikely(vdev->broken)) {
1634         return 0;
1635     }
1636 
1637     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1638         return virtqueue_packed_drop_all(vq);
1639     } else {
1640         return virtqueue_split_drop_all(vq);
1641     }
1642 }
1643 
1644 /* Reading and writing a structure directly to QEMUFile is *awful*, but
1645  * it is what QEMU has always done by mistake.  We can change it sooner
1646  * or later by bumping the version number of the affected vm states.
1647  * In the meanwhile, since the in-memory layout of VirtQueueElement
1648  * has changed, we need to marshal to and from the layout that was
1649  * used before the change.
1650  */
1651 typedef struct VirtQueueElementOld {
1652     unsigned int index;
1653     unsigned int out_num;
1654     unsigned int in_num;
1655     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1656     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1657     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1658     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1659 } VirtQueueElementOld;
1660 
1661 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1662 {
1663     VirtQueueElement *elem;
1664     VirtQueueElementOld data;
1665     int i;
1666 
1667     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1668 
1669     /* TODO: teach all callers that this can fail, and return failure instead
1670      * of asserting here.
1671      * This is just one thing (there are probably more) that must be
1672      * fixed before we can allow NDEBUG compilation.
1673      */
1674     assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1675     assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1676 
1677     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1678     elem->index = data.index;
1679 
1680     for (i = 0; i < elem->in_num; i++) {
1681         elem->in_addr[i] = data.in_addr[i];
1682     }
1683 
1684     for (i = 0; i < elem->out_num; i++) {
1685         elem->out_addr[i] = data.out_addr[i];
1686     }
1687 
1688     for (i = 0; i < elem->in_num; i++) {
1689         /* Base is overwritten by virtqueue_map.  */
1690         elem->in_sg[i].iov_base = 0;
1691         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1692     }
1693 
1694     for (i = 0; i < elem->out_num; i++) {
1695         /* Base is overwritten by virtqueue_map.  */
1696         elem->out_sg[i].iov_base = 0;
1697         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1698     }
1699 
1700     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1701         qemu_get_be32s(f, &elem->ndescs);
1702     }
1703 
1704     virtqueue_map(vdev, elem);
1705     return elem;
1706 }
1707 
1708 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1709                                 VirtQueueElement *elem)
1710 {
1711     VirtQueueElementOld data;
1712     int i;
1713 
1714     memset(&data, 0, sizeof(data));
1715     data.index = elem->index;
1716     data.in_num = elem->in_num;
1717     data.out_num = elem->out_num;
1718 
1719     for (i = 0; i < elem->in_num; i++) {
1720         data.in_addr[i] = elem->in_addr[i];
1721     }
1722 
1723     for (i = 0; i < elem->out_num; i++) {
1724         data.out_addr[i] = elem->out_addr[i];
1725     }
1726 
1727     for (i = 0; i < elem->in_num; i++) {
1728         /* Base is overwritten by virtqueue_map when loading.  Do not
1729          * save it, as it would leak the QEMU address space layout.  */
1730         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1731     }
1732 
1733     for (i = 0; i < elem->out_num; i++) {
1734         /* Do not save iov_base as above.  */
1735         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1736     }
1737 
1738     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1739         qemu_put_be32s(f, &elem->ndescs);
1740     }
1741 
1742     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1743 }
1744 
1745 /* virtio device */
1746 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1747 {
1748     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1749     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1750 
1751     if (unlikely(vdev->broken)) {
1752         return;
1753     }
1754 
1755     if (k->notify) {
1756         k->notify(qbus->parent, vector);
1757     }
1758 }
1759 
1760 void virtio_update_irq(VirtIODevice *vdev)
1761 {
1762     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1763 }
1764 
1765 static int virtio_validate_features(VirtIODevice *vdev)
1766 {
1767     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1768 
1769     if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1770         !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1771         return -EFAULT;
1772     }
1773 
1774     if (k->validate_features) {
1775         return k->validate_features(vdev);
1776     } else {
1777         return 0;
1778     }
1779 }
1780 
1781 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1782 {
1783     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1784     trace_virtio_set_status(vdev, val);
1785 
1786     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1787         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
1788             val & VIRTIO_CONFIG_S_FEATURES_OK) {
1789             int ret = virtio_validate_features(vdev);
1790 
1791             if (ret) {
1792                 return ret;
1793             }
1794         }
1795     }
1796 
1797     if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
1798         (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1799         virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
1800     }
1801 
1802     if (k->set_status) {
1803         k->set_status(vdev, val);
1804     }
1805     vdev->status = val;
1806 
1807     return 0;
1808 }
1809 
1810 static enum virtio_device_endian virtio_default_endian(void)
1811 {
1812     if (target_words_bigendian()) {
1813         return VIRTIO_DEVICE_ENDIAN_BIG;
1814     } else {
1815         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1816     }
1817 }
1818 
1819 static enum virtio_device_endian virtio_current_cpu_endian(void)
1820 {
1821     CPUClass *cc = CPU_GET_CLASS(current_cpu);
1822 
1823     if (cc->virtio_is_big_endian(current_cpu)) {
1824         return VIRTIO_DEVICE_ENDIAN_BIG;
1825     } else {
1826         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1827     }
1828 }
1829 
1830 void virtio_reset(void *opaque)
1831 {
1832     VirtIODevice *vdev = opaque;
1833     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1834     int i;
1835 
1836     virtio_set_status(vdev, 0);
1837     if (current_cpu) {
1838         /* Guest initiated reset */
1839         vdev->device_endian = virtio_current_cpu_endian();
1840     } else {
1841         /* System reset */
1842         vdev->device_endian = virtio_default_endian();
1843     }
1844 
1845     if (k->reset) {
1846         k->reset(vdev);
1847     }
1848 
1849     vdev->start_on_kick = false;
1850     vdev->started = false;
1851     vdev->broken = false;
1852     vdev->guest_features = 0;
1853     vdev->queue_sel = 0;
1854     vdev->status = 0;
1855     atomic_set(&vdev->isr, 0);
1856     vdev->config_vector = VIRTIO_NO_VECTOR;
1857     virtio_notify_vector(vdev, vdev->config_vector);
1858 
1859     for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1860         vdev->vq[i].vring.desc = 0;
1861         vdev->vq[i].vring.avail = 0;
1862         vdev->vq[i].vring.used = 0;
1863         vdev->vq[i].last_avail_idx = 0;
1864         vdev->vq[i].shadow_avail_idx = 0;
1865         vdev->vq[i].used_idx = 0;
1866         vdev->vq[i].last_avail_wrap_counter = true;
1867         vdev->vq[i].shadow_avail_wrap_counter = true;
1868         vdev->vq[i].used_wrap_counter = true;
1869         virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
1870         vdev->vq[i].signalled_used = 0;
1871         vdev->vq[i].signalled_used_valid = false;
1872         vdev->vq[i].notification = true;
1873         vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
1874         vdev->vq[i].inuse = 0;
1875         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
1876     }
1877 }
1878 
1879 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
1880 {
1881     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1882     uint8_t val;
1883 
1884     if (addr + sizeof(val) > vdev->config_len) {
1885         return (uint32_t)-1;
1886     }
1887 
1888     k->get_config(vdev, vdev->config);
1889 
1890     val = ldub_p(vdev->config + addr);
1891     return val;
1892 }
1893 
1894 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
1895 {
1896     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1897     uint16_t val;
1898 
1899     if (addr + sizeof(val) > vdev->config_len) {
1900         return (uint32_t)-1;
1901     }
1902 
1903     k->get_config(vdev, vdev->config);
1904 
1905     val = lduw_p(vdev->config + addr);
1906     return val;
1907 }
1908 
1909 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
1910 {
1911     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1912     uint32_t val;
1913 
1914     if (addr + sizeof(val) > vdev->config_len) {
1915         return (uint32_t)-1;
1916     }
1917 
1918     k->get_config(vdev, vdev->config);
1919 
1920     val = ldl_p(vdev->config + addr);
1921     return val;
1922 }
1923 
1924 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1925 {
1926     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1927     uint8_t val = data;
1928 
1929     if (addr + sizeof(val) > vdev->config_len) {
1930         return;
1931     }
1932 
1933     stb_p(vdev->config + addr, val);
1934 
1935     if (k->set_config) {
1936         k->set_config(vdev, vdev->config);
1937     }
1938 }
1939 
1940 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1941 {
1942     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1943     uint16_t val = data;
1944 
1945     if (addr + sizeof(val) > vdev->config_len) {
1946         return;
1947     }
1948 
1949     stw_p(vdev->config + addr, val);
1950 
1951     if (k->set_config) {
1952         k->set_config(vdev, vdev->config);
1953     }
1954 }
1955 
1956 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1957 {
1958     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1959     uint32_t val = data;
1960 
1961     if (addr + sizeof(val) > vdev->config_len) {
1962         return;
1963     }
1964 
1965     stl_p(vdev->config + addr, val);
1966 
1967     if (k->set_config) {
1968         k->set_config(vdev, vdev->config);
1969     }
1970 }
1971 
1972 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
1973 {
1974     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1975     uint8_t val;
1976 
1977     if (addr + sizeof(val) > vdev->config_len) {
1978         return (uint32_t)-1;
1979     }
1980 
1981     k->get_config(vdev, vdev->config);
1982 
1983     val = ldub_p(vdev->config + addr);
1984     return val;
1985 }
1986 
1987 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
1988 {
1989     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1990     uint16_t val;
1991 
1992     if (addr + sizeof(val) > vdev->config_len) {
1993         return (uint32_t)-1;
1994     }
1995 
1996     k->get_config(vdev, vdev->config);
1997 
1998     val = lduw_le_p(vdev->config + addr);
1999     return val;
2000 }
2001 
2002 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2003 {
2004     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2005     uint32_t val;
2006 
2007     if (addr + sizeof(val) > vdev->config_len) {
2008         return (uint32_t)-1;
2009     }
2010 
2011     k->get_config(vdev, vdev->config);
2012 
2013     val = ldl_le_p(vdev->config + addr);
2014     return val;
2015 }
2016 
2017 void virtio_config_modern_writeb(VirtIODevice *vdev,
2018                                  uint32_t addr, uint32_t data)
2019 {
2020     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2021     uint8_t val = data;
2022 
2023     if (addr + sizeof(val) > vdev->config_len) {
2024         return;
2025     }
2026 
2027     stb_p(vdev->config + addr, val);
2028 
2029     if (k->set_config) {
2030         k->set_config(vdev, vdev->config);
2031     }
2032 }
2033 
2034 void virtio_config_modern_writew(VirtIODevice *vdev,
2035                                  uint32_t addr, uint32_t data)
2036 {
2037     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2038     uint16_t val = data;
2039 
2040     if (addr + sizeof(val) > vdev->config_len) {
2041         return;
2042     }
2043 
2044     stw_le_p(vdev->config + addr, val);
2045 
2046     if (k->set_config) {
2047         k->set_config(vdev, vdev->config);
2048     }
2049 }
2050 
2051 void virtio_config_modern_writel(VirtIODevice *vdev,
2052                                  uint32_t addr, uint32_t data)
2053 {
2054     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2055     uint32_t val = data;
2056 
2057     if (addr + sizeof(val) > vdev->config_len) {
2058         return;
2059     }
2060 
2061     stl_le_p(vdev->config + addr, val);
2062 
2063     if (k->set_config) {
2064         k->set_config(vdev, vdev->config);
2065     }
2066 }
2067 
2068 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2069 {
2070     if (!vdev->vq[n].vring.num) {
2071         return;
2072     }
2073     vdev->vq[n].vring.desc = addr;
2074     virtio_queue_update_rings(vdev, n);
2075 }
2076 
2077 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2078 {
2079     return vdev->vq[n].vring.desc;
2080 }
2081 
2082 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2083                             hwaddr avail, hwaddr used)
2084 {
2085     if (!vdev->vq[n].vring.num) {
2086         return;
2087     }
2088     vdev->vq[n].vring.desc = desc;
2089     vdev->vq[n].vring.avail = avail;
2090     vdev->vq[n].vring.used = used;
2091     virtio_init_region_cache(vdev, n);
2092 }
2093 
2094 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2095 {
2096     /* Don't allow guest to flip queue between existent and
2097      * nonexistent states, or to set it to an invalid size.
2098      */
2099     if (!!num != !!vdev->vq[n].vring.num ||
2100         num > VIRTQUEUE_MAX_SIZE ||
2101         num < 0) {
2102         return;
2103     }
2104     vdev->vq[n].vring.num = num;
2105 }
2106 
2107 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2108 {
2109     return QLIST_FIRST(&vdev->vector_queues[vector]);
2110 }
2111 
2112 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2113 {
2114     return QLIST_NEXT(vq, node);
2115 }
2116 
2117 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2118 {
2119     return vdev->vq[n].vring.num;
2120 }
2121 
2122 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2123 {
2124     return vdev->vq[n].vring.num_default;
2125 }
2126 
2127 int virtio_get_num_queues(VirtIODevice *vdev)
2128 {
2129     int i;
2130 
2131     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2132         if (!virtio_queue_get_num(vdev, i)) {
2133             break;
2134         }
2135     }
2136 
2137     return i;
2138 }
2139 
2140 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2141 {
2142     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2143     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2144 
2145     /* virtio-1 compliant devices cannot change the alignment */
2146     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2147         error_report("tried to modify queue alignment for virtio-1 device");
2148         return;
2149     }
2150     /* Check that the transport told us it was going to do this
2151      * (so a buggy transport will immediately assert rather than
2152      * silently failing to migrate this state)
2153      */
2154     assert(k->has_variable_vring_alignment);
2155 
2156     if (align) {
2157         vdev->vq[n].vring.align = align;
2158         virtio_queue_update_rings(vdev, n);
2159     }
2160 }
2161 
2162 static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
2163 {
2164     bool ret = false;
2165 
2166     if (vq->vring.desc && vq->handle_aio_output) {
2167         VirtIODevice *vdev = vq->vdev;
2168 
2169         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2170         ret = vq->handle_aio_output(vdev, vq);
2171 
2172         if (unlikely(vdev->start_on_kick)) {
2173             virtio_set_started(vdev, true);
2174         }
2175     }
2176 
2177     return ret;
2178 }
2179 
2180 static void virtio_queue_notify_vq(VirtQueue *vq)
2181 {
2182     if (vq->vring.desc && vq->handle_output) {
2183         VirtIODevice *vdev = vq->vdev;
2184 
2185         if (unlikely(vdev->broken)) {
2186             return;
2187         }
2188 
2189         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2190         vq->handle_output(vdev, vq);
2191 
2192         if (unlikely(vdev->start_on_kick)) {
2193             virtio_set_started(vdev, true);
2194         }
2195     }
2196 }
2197 
2198 void virtio_queue_notify(VirtIODevice *vdev, int n)
2199 {
2200     VirtQueue *vq = &vdev->vq[n];
2201 
2202     if (unlikely(!vq->vring.desc || vdev->broken)) {
2203         return;
2204     }
2205 
2206     trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2207     if (vq->handle_aio_output) {
2208         event_notifier_set(&vq->host_notifier);
2209     } else if (vq->handle_output) {
2210         vq->handle_output(vdev, vq);
2211 
2212         if (unlikely(vdev->start_on_kick)) {
2213             virtio_set_started(vdev, true);
2214         }
2215     }
2216 }
2217 
2218 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2219 {
2220     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2221         VIRTIO_NO_VECTOR;
2222 }
2223 
2224 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2225 {
2226     VirtQueue *vq = &vdev->vq[n];
2227 
2228     if (n < VIRTIO_QUEUE_MAX) {
2229         if (vdev->vector_queues &&
2230             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2231             QLIST_REMOVE(vq, node);
2232         }
2233         vdev->vq[n].vector = vector;
2234         if (vdev->vector_queues &&
2235             vector != VIRTIO_NO_VECTOR) {
2236             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2237         }
2238     }
2239 }
2240 
2241 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2242                             VirtIOHandleOutput handle_output)
2243 {
2244     int i;
2245 
2246     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2247         if (vdev->vq[i].vring.num == 0)
2248             break;
2249     }
2250 
2251     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2252         abort();
2253 
2254     vdev->vq[i].vring.num = queue_size;
2255     vdev->vq[i].vring.num_default = queue_size;
2256     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2257     vdev->vq[i].handle_output = handle_output;
2258     vdev->vq[i].handle_aio_output = NULL;
2259     vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) *
2260                                        queue_size);
2261 
2262     return &vdev->vq[i];
2263 }
2264 
2265 void virtio_del_queue(VirtIODevice *vdev, int n)
2266 {
2267     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2268         abort();
2269     }
2270 
2271     vdev->vq[n].vring.num = 0;
2272     vdev->vq[n].vring.num_default = 0;
2273     vdev->vq[n].handle_output = NULL;
2274     vdev->vq[n].handle_aio_output = NULL;
2275     g_free(vdev->vq[n].used_elems);
2276 }
2277 
2278 static void virtio_set_isr(VirtIODevice *vdev, int value)
2279 {
2280     uint8_t old = atomic_read(&vdev->isr);
2281 
2282     /* Do not write ISR if it does not change, so that its cacheline remains
2283      * shared in the common case where the guest does not read it.
2284      */
2285     if ((old & value) != value) {
2286         atomic_or(&vdev->isr, value);
2287     }
2288 }
2289 
2290 /* Called within rcu_read_lock().  */
2291 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2292 {
2293     uint16_t old, new;
2294     bool v;
2295     /* We need to expose used array entries before checking used event. */
2296     smp_mb();
2297     /* Always notify when queue is empty (when feature acknowledge) */
2298     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2299         !vq->inuse && virtio_queue_empty(vq)) {
2300         return true;
2301     }
2302 
2303     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2304         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2305     }
2306 
2307     v = vq->signalled_used_valid;
2308     vq->signalled_used_valid = true;
2309     old = vq->signalled_used;
2310     new = vq->signalled_used = vq->used_idx;
2311     return !v || vring_need_event(vring_get_used_event(vq), new, old);
2312 }
2313 
2314 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2315 {
2316     bool should_notify;
2317     rcu_read_lock();
2318     should_notify = virtio_should_notify(vdev, vq);
2319     rcu_read_unlock();
2320 
2321     if (!should_notify) {
2322         return;
2323     }
2324 
2325     trace_virtio_notify_irqfd(vdev, vq);
2326 
2327     /*
2328      * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2329      * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2330      * incorrectly polling this bit during crashdump and hibernation
2331      * in MSI mode, causing a hang if this bit is never updated.
2332      * Recent releases of Windows do not really shut down, but rather
2333      * log out and hibernate to make the next startup faster.  Hence,
2334      * this manifested as a more serious hang during shutdown with
2335      *
2336      * Next driver release from 2016 fixed this problem, so working around it
2337      * is not a must, but it's easy to do so let's do it here.
2338      *
2339      * Note: it's safe to update ISR from any thread as it was switched
2340      * to an atomic operation.
2341      */
2342     virtio_set_isr(vq->vdev, 0x1);
2343     event_notifier_set(&vq->guest_notifier);
2344 }
2345 
2346 static void virtio_irq(VirtQueue *vq)
2347 {
2348     virtio_set_isr(vq->vdev, 0x1);
2349     virtio_notify_vector(vq->vdev, vq->vector);
2350 }
2351 
2352 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2353 {
2354     bool should_notify;
2355     rcu_read_lock();
2356     should_notify = virtio_should_notify(vdev, vq);
2357     rcu_read_unlock();
2358 
2359     if (!should_notify) {
2360         return;
2361     }
2362 
2363     trace_virtio_notify(vdev, vq);
2364     virtio_irq(vq);
2365 }
2366 
2367 void virtio_notify_config(VirtIODevice *vdev)
2368 {
2369     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2370         return;
2371 
2372     virtio_set_isr(vdev, 0x3);
2373     vdev->generation++;
2374     virtio_notify_vector(vdev, vdev->config_vector);
2375 }
2376 
2377 static bool virtio_device_endian_needed(void *opaque)
2378 {
2379     VirtIODevice *vdev = opaque;
2380 
2381     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2382     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2383         return vdev->device_endian != virtio_default_endian();
2384     }
2385     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2386     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2387 }
2388 
2389 static bool virtio_64bit_features_needed(void *opaque)
2390 {
2391     VirtIODevice *vdev = opaque;
2392 
2393     return (vdev->host_features >> 32) != 0;
2394 }
2395 
2396 static bool virtio_virtqueue_needed(void *opaque)
2397 {
2398     VirtIODevice *vdev = opaque;
2399 
2400     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2401 }
2402 
2403 static bool virtio_packed_virtqueue_needed(void *opaque)
2404 {
2405     VirtIODevice *vdev = opaque;
2406 
2407     return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2408 }
2409 
2410 static bool virtio_ringsize_needed(void *opaque)
2411 {
2412     VirtIODevice *vdev = opaque;
2413     int i;
2414 
2415     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2416         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2417             return true;
2418         }
2419     }
2420     return false;
2421 }
2422 
2423 static bool virtio_extra_state_needed(void *opaque)
2424 {
2425     VirtIODevice *vdev = opaque;
2426     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2427     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2428 
2429     return k->has_extra_state &&
2430         k->has_extra_state(qbus->parent);
2431 }
2432 
2433 static bool virtio_broken_needed(void *opaque)
2434 {
2435     VirtIODevice *vdev = opaque;
2436 
2437     return vdev->broken;
2438 }
2439 
2440 static bool virtio_started_needed(void *opaque)
2441 {
2442     VirtIODevice *vdev = opaque;
2443 
2444     return vdev->started;
2445 }
2446 
2447 static const VMStateDescription vmstate_virtqueue = {
2448     .name = "virtqueue_state",
2449     .version_id = 1,
2450     .minimum_version_id = 1,
2451     .fields = (VMStateField[]) {
2452         VMSTATE_UINT64(vring.avail, struct VirtQueue),
2453         VMSTATE_UINT64(vring.used, struct VirtQueue),
2454         VMSTATE_END_OF_LIST()
2455     }
2456 };
2457 
2458 static const VMStateDescription vmstate_packed_virtqueue = {
2459     .name = "packed_virtqueue_state",
2460     .version_id = 1,
2461     .minimum_version_id = 1,
2462     .fields = (VMStateField[]) {
2463         VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2464         VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2465         VMSTATE_UINT16(used_idx, struct VirtQueue),
2466         VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2467         VMSTATE_UINT32(inuse, struct VirtQueue),
2468         VMSTATE_END_OF_LIST()
2469     }
2470 };
2471 
2472 static const VMStateDescription vmstate_virtio_virtqueues = {
2473     .name = "virtio/virtqueues",
2474     .version_id = 1,
2475     .minimum_version_id = 1,
2476     .needed = &virtio_virtqueue_needed,
2477     .fields = (VMStateField[]) {
2478         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2479                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2480         VMSTATE_END_OF_LIST()
2481     }
2482 };
2483 
2484 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2485     .name = "virtio/packed_virtqueues",
2486     .version_id = 1,
2487     .minimum_version_id = 1,
2488     .needed = &virtio_packed_virtqueue_needed,
2489     .fields = (VMStateField[]) {
2490         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2491                       VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2492         VMSTATE_END_OF_LIST()
2493     }
2494 };
2495 
2496 static const VMStateDescription vmstate_ringsize = {
2497     .name = "ringsize_state",
2498     .version_id = 1,
2499     .minimum_version_id = 1,
2500     .fields = (VMStateField[]) {
2501         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2502         VMSTATE_END_OF_LIST()
2503     }
2504 };
2505 
2506 static const VMStateDescription vmstate_virtio_ringsize = {
2507     .name = "virtio/ringsize",
2508     .version_id = 1,
2509     .minimum_version_id = 1,
2510     .needed = &virtio_ringsize_needed,
2511     .fields = (VMStateField[]) {
2512         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2513                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2514         VMSTATE_END_OF_LIST()
2515     }
2516 };
2517 
2518 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2519                            const VMStateField *field)
2520 {
2521     VirtIODevice *vdev = pv;
2522     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2523     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2524 
2525     if (!k->load_extra_state) {
2526         return -1;
2527     } else {
2528         return k->load_extra_state(qbus->parent, f);
2529     }
2530 }
2531 
2532 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2533                            const VMStateField *field, QJSON *vmdesc)
2534 {
2535     VirtIODevice *vdev = pv;
2536     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2537     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2538 
2539     k->save_extra_state(qbus->parent, f);
2540     return 0;
2541 }
2542 
2543 static const VMStateInfo vmstate_info_extra_state = {
2544     .name = "virtqueue_extra_state",
2545     .get = get_extra_state,
2546     .put = put_extra_state,
2547 };
2548 
2549 static const VMStateDescription vmstate_virtio_extra_state = {
2550     .name = "virtio/extra_state",
2551     .version_id = 1,
2552     .minimum_version_id = 1,
2553     .needed = &virtio_extra_state_needed,
2554     .fields = (VMStateField[]) {
2555         {
2556             .name         = "extra_state",
2557             .version_id   = 0,
2558             .field_exists = NULL,
2559             .size         = 0,
2560             .info         = &vmstate_info_extra_state,
2561             .flags        = VMS_SINGLE,
2562             .offset       = 0,
2563         },
2564         VMSTATE_END_OF_LIST()
2565     }
2566 };
2567 
2568 static const VMStateDescription vmstate_virtio_device_endian = {
2569     .name = "virtio/device_endian",
2570     .version_id = 1,
2571     .minimum_version_id = 1,
2572     .needed = &virtio_device_endian_needed,
2573     .fields = (VMStateField[]) {
2574         VMSTATE_UINT8(device_endian, VirtIODevice),
2575         VMSTATE_END_OF_LIST()
2576     }
2577 };
2578 
2579 static const VMStateDescription vmstate_virtio_64bit_features = {
2580     .name = "virtio/64bit_features",
2581     .version_id = 1,
2582     .minimum_version_id = 1,
2583     .needed = &virtio_64bit_features_needed,
2584     .fields = (VMStateField[]) {
2585         VMSTATE_UINT64(guest_features, VirtIODevice),
2586         VMSTATE_END_OF_LIST()
2587     }
2588 };
2589 
2590 static const VMStateDescription vmstate_virtio_broken = {
2591     .name = "virtio/broken",
2592     .version_id = 1,
2593     .minimum_version_id = 1,
2594     .needed = &virtio_broken_needed,
2595     .fields = (VMStateField[]) {
2596         VMSTATE_BOOL(broken, VirtIODevice),
2597         VMSTATE_END_OF_LIST()
2598     }
2599 };
2600 
2601 static const VMStateDescription vmstate_virtio_started = {
2602     .name = "virtio/started",
2603     .version_id = 1,
2604     .minimum_version_id = 1,
2605     .needed = &virtio_started_needed,
2606     .fields = (VMStateField[]) {
2607         VMSTATE_BOOL(started, VirtIODevice),
2608         VMSTATE_END_OF_LIST()
2609     }
2610 };
2611 
2612 static const VMStateDescription vmstate_virtio = {
2613     .name = "virtio",
2614     .version_id = 1,
2615     .minimum_version_id = 1,
2616     .minimum_version_id_old = 1,
2617     .fields = (VMStateField[]) {
2618         VMSTATE_END_OF_LIST()
2619     },
2620     .subsections = (const VMStateDescription*[]) {
2621         &vmstate_virtio_device_endian,
2622         &vmstate_virtio_64bit_features,
2623         &vmstate_virtio_virtqueues,
2624         &vmstate_virtio_ringsize,
2625         &vmstate_virtio_broken,
2626         &vmstate_virtio_extra_state,
2627         &vmstate_virtio_started,
2628         &vmstate_virtio_packed_virtqueues,
2629         NULL
2630     }
2631 };
2632 
2633 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2634 {
2635     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2636     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2637     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2638     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2639     int i;
2640 
2641     if (k->save_config) {
2642         k->save_config(qbus->parent, f);
2643     }
2644 
2645     qemu_put_8s(f, &vdev->status);
2646     qemu_put_8s(f, &vdev->isr);
2647     qemu_put_be16s(f, &vdev->queue_sel);
2648     qemu_put_be32s(f, &guest_features_lo);
2649     qemu_put_be32(f, vdev->config_len);
2650     qemu_put_buffer(f, vdev->config, vdev->config_len);
2651 
2652     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2653         if (vdev->vq[i].vring.num == 0)
2654             break;
2655     }
2656 
2657     qemu_put_be32(f, i);
2658 
2659     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2660         if (vdev->vq[i].vring.num == 0)
2661             break;
2662 
2663         qemu_put_be32(f, vdev->vq[i].vring.num);
2664         if (k->has_variable_vring_alignment) {
2665             qemu_put_be32(f, vdev->vq[i].vring.align);
2666         }
2667         /*
2668          * Save desc now, the rest of the ring addresses are saved in
2669          * subsections for VIRTIO-1 devices.
2670          */
2671         qemu_put_be64(f, vdev->vq[i].vring.desc);
2672         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2673         if (k->save_queue) {
2674             k->save_queue(qbus->parent, i, f);
2675         }
2676     }
2677 
2678     if (vdc->save != NULL) {
2679         vdc->save(vdev, f);
2680     }
2681 
2682     if (vdc->vmsd) {
2683         int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2684         if (ret) {
2685             return ret;
2686         }
2687     }
2688 
2689     /* Subsections */
2690     return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2691 }
2692 
2693 /* A wrapper for use as a VMState .put function */
2694 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2695                               const VMStateField *field, QJSON *vmdesc)
2696 {
2697     return virtio_save(VIRTIO_DEVICE(opaque), f);
2698 }
2699 
2700 /* A wrapper for use as a VMState .get function */
2701 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2702                              const VMStateField *field)
2703 {
2704     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2705     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2706 
2707     return virtio_load(vdev, f, dc->vmsd->version_id);
2708 }
2709 
2710 const VMStateInfo  virtio_vmstate_info = {
2711     .name = "virtio",
2712     .get = virtio_device_get,
2713     .put = virtio_device_put,
2714 };
2715 
2716 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2717 {
2718     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2719     bool bad = (val & ~(vdev->host_features)) != 0;
2720 
2721     val &= vdev->host_features;
2722     if (k->set_features) {
2723         k->set_features(vdev, val);
2724     }
2725     vdev->guest_features = val;
2726     return bad ? -1 : 0;
2727 }
2728 
2729 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2730 {
2731     int ret;
2732     /*
2733      * The driver must not attempt to set features after feature negotiation
2734      * has finished.
2735      */
2736     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2737         return -EINVAL;
2738     }
2739     ret = virtio_set_features_nocheck(vdev, val);
2740     if (!ret) {
2741         if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2742             /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2743             int i;
2744             for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2745                 if (vdev->vq[i].vring.num != 0) {
2746                     virtio_init_region_cache(vdev, i);
2747                 }
2748             }
2749         }
2750 
2751         if (!virtio_device_started(vdev, vdev->status) &&
2752             !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2753             vdev->start_on_kick = true;
2754         }
2755     }
2756     return ret;
2757 }
2758 
2759 size_t virtio_feature_get_config_size(VirtIOFeature *feature_sizes,
2760                                       uint64_t host_features)
2761 {
2762     size_t config_size = 0;
2763     int i;
2764 
2765     for (i = 0; feature_sizes[i].flags != 0; i++) {
2766         if (host_features & feature_sizes[i].flags) {
2767             config_size = MAX(feature_sizes[i].end, config_size);
2768         }
2769     }
2770 
2771     return config_size;
2772 }
2773 
2774 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
2775 {
2776     int i, ret;
2777     int32_t config_len;
2778     uint32_t num;
2779     uint32_t features;
2780     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2781     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2782     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2783 
2784     /*
2785      * We poison the endianness to ensure it does not get used before
2786      * subsections have been loaded.
2787      */
2788     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
2789 
2790     if (k->load_config) {
2791         ret = k->load_config(qbus->parent, f);
2792         if (ret)
2793             return ret;
2794     }
2795 
2796     qemu_get_8s(f, &vdev->status);
2797     qemu_get_8s(f, &vdev->isr);
2798     qemu_get_be16s(f, &vdev->queue_sel);
2799     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
2800         return -1;
2801     }
2802     qemu_get_be32s(f, &features);
2803 
2804     /*
2805      * Temporarily set guest_features low bits - needed by
2806      * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
2807      * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
2808      *
2809      * Note: devices should always test host features in future - don't create
2810      * new dependencies like this.
2811      */
2812     vdev->guest_features = features;
2813 
2814     config_len = qemu_get_be32(f);
2815 
2816     /*
2817      * There are cases where the incoming config can be bigger or smaller
2818      * than what we have; so load what we have space for, and skip
2819      * any excess that's in the stream.
2820      */
2821     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
2822 
2823     while (config_len > vdev->config_len) {
2824         qemu_get_byte(f);
2825         config_len--;
2826     }
2827 
2828     num = qemu_get_be32(f);
2829 
2830     if (num > VIRTIO_QUEUE_MAX) {
2831         error_report("Invalid number of virtqueues: 0x%x", num);
2832         return -1;
2833     }
2834 
2835     for (i = 0; i < num; i++) {
2836         vdev->vq[i].vring.num = qemu_get_be32(f);
2837         if (k->has_variable_vring_alignment) {
2838             vdev->vq[i].vring.align = qemu_get_be32(f);
2839         }
2840         vdev->vq[i].vring.desc = qemu_get_be64(f);
2841         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
2842         vdev->vq[i].signalled_used_valid = false;
2843         vdev->vq[i].notification = true;
2844 
2845         if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
2846             error_report("VQ %d address 0x0 "
2847                          "inconsistent with Host index 0x%x",
2848                          i, vdev->vq[i].last_avail_idx);
2849             return -1;
2850         }
2851         if (k->load_queue) {
2852             ret = k->load_queue(qbus->parent, i, f);
2853             if (ret)
2854                 return ret;
2855         }
2856     }
2857 
2858     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
2859 
2860     if (vdc->load != NULL) {
2861         ret = vdc->load(vdev, f, version_id);
2862         if (ret) {
2863             return ret;
2864         }
2865     }
2866 
2867     if (vdc->vmsd) {
2868         ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
2869         if (ret) {
2870             return ret;
2871         }
2872     }
2873 
2874     /* Subsections */
2875     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
2876     if (ret) {
2877         return ret;
2878     }
2879 
2880     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
2881         vdev->device_endian = virtio_default_endian();
2882     }
2883 
2884     if (virtio_64bit_features_needed(vdev)) {
2885         /*
2886          * Subsection load filled vdev->guest_features.  Run them
2887          * through virtio_set_features to sanity-check them against
2888          * host_features.
2889          */
2890         uint64_t features64 = vdev->guest_features;
2891         if (virtio_set_features_nocheck(vdev, features64) < 0) {
2892             error_report("Features 0x%" PRIx64 " unsupported. "
2893                          "Allowed features: 0x%" PRIx64,
2894                          features64, vdev->host_features);
2895             return -1;
2896         }
2897     } else {
2898         if (virtio_set_features_nocheck(vdev, features) < 0) {
2899             error_report("Features 0x%x unsupported. "
2900                          "Allowed features: 0x%" PRIx64,
2901                          features, vdev->host_features);
2902             return -1;
2903         }
2904     }
2905 
2906     if (!virtio_device_started(vdev, vdev->status) &&
2907         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2908         vdev->start_on_kick = true;
2909     }
2910 
2911     rcu_read_lock();
2912     for (i = 0; i < num; i++) {
2913         if (vdev->vq[i].vring.desc) {
2914             uint16_t nheads;
2915 
2916             /*
2917              * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
2918              * only the region cache needs to be set up.  Legacy devices need
2919              * to calculate used and avail ring addresses based on the desc
2920              * address.
2921              */
2922             if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2923                 virtio_init_region_cache(vdev, i);
2924             } else {
2925                 virtio_queue_update_rings(vdev, i);
2926             }
2927 
2928             if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2929                 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
2930                 vdev->vq[i].shadow_avail_wrap_counter =
2931                                         vdev->vq[i].last_avail_wrap_counter;
2932                 continue;
2933             }
2934 
2935             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
2936             /* Check it isn't doing strange things with descriptor numbers. */
2937             if (nheads > vdev->vq[i].vring.num) {
2938                 error_report("VQ %d size 0x%x Guest index 0x%x "
2939                              "inconsistent with Host index 0x%x: delta 0x%x",
2940                              i, vdev->vq[i].vring.num,
2941                              vring_avail_idx(&vdev->vq[i]),
2942                              vdev->vq[i].last_avail_idx, nheads);
2943                 return -1;
2944             }
2945             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
2946             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
2947 
2948             /*
2949              * Some devices migrate VirtQueueElements that have been popped
2950              * from the avail ring but not yet returned to the used ring.
2951              * Since max ring size < UINT16_MAX it's safe to use modulo
2952              * UINT16_MAX + 1 subtraction.
2953              */
2954             vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
2955                                 vdev->vq[i].used_idx);
2956             if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
2957                 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
2958                              "used_idx 0x%x",
2959                              i, vdev->vq[i].vring.num,
2960                              vdev->vq[i].last_avail_idx,
2961                              vdev->vq[i].used_idx);
2962                 return -1;
2963             }
2964         }
2965     }
2966     rcu_read_unlock();
2967 
2968     return 0;
2969 }
2970 
2971 void virtio_cleanup(VirtIODevice *vdev)
2972 {
2973     qemu_del_vm_change_state_handler(vdev->vmstate);
2974 }
2975 
2976 static void virtio_vmstate_change(void *opaque, int running, RunState state)
2977 {
2978     VirtIODevice *vdev = opaque;
2979     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2980     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2981     bool backend_run = running && virtio_device_started(vdev, vdev->status);
2982     vdev->vm_running = running;
2983 
2984     if (backend_run) {
2985         virtio_set_status(vdev, vdev->status);
2986     }
2987 
2988     if (k->vmstate_change) {
2989         k->vmstate_change(qbus->parent, backend_run);
2990     }
2991 
2992     if (!backend_run) {
2993         virtio_set_status(vdev, vdev->status);
2994     }
2995 }
2996 
2997 void virtio_instance_init_common(Object *proxy_obj, void *data,
2998                                  size_t vdev_size, const char *vdev_name)
2999 {
3000     DeviceState *vdev = data;
3001 
3002     object_initialize_child(proxy_obj, "virtio-backend", vdev, vdev_size,
3003                             vdev_name, &error_abort, NULL);
3004     qdev_alias_all_properties(vdev, proxy_obj);
3005 }
3006 
3007 void virtio_init(VirtIODevice *vdev, const char *name,
3008                  uint16_t device_id, size_t config_size)
3009 {
3010     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3011     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3012     int i;
3013     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3014 
3015     if (nvectors) {
3016         vdev->vector_queues =
3017             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3018     }
3019 
3020     vdev->start_on_kick = false;
3021     vdev->started = false;
3022     vdev->device_id = device_id;
3023     vdev->status = 0;
3024     atomic_set(&vdev->isr, 0);
3025     vdev->queue_sel = 0;
3026     vdev->config_vector = VIRTIO_NO_VECTOR;
3027     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
3028     vdev->vm_running = runstate_is_running();
3029     vdev->broken = false;
3030     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3031         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3032         vdev->vq[i].vdev = vdev;
3033         vdev->vq[i].queue_index = i;
3034     }
3035 
3036     vdev->name = name;
3037     vdev->config_len = config_size;
3038     if (vdev->config_len) {
3039         vdev->config = g_malloc0(config_size);
3040     } else {
3041         vdev->config = NULL;
3042     }
3043     vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3044             virtio_vmstate_change, vdev);
3045     vdev->device_endian = virtio_default_endian();
3046     vdev->use_guest_notifier_mask = true;
3047 }
3048 
3049 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3050 {
3051     return vdev->vq[n].vring.desc;
3052 }
3053 
3054 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3055 {
3056     return virtio_queue_get_desc_addr(vdev, n) != 0;
3057 }
3058 
3059 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3060 {
3061     return vdev->vq[n].vring.avail;
3062 }
3063 
3064 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3065 {
3066     return vdev->vq[n].vring.used;
3067 }
3068 
3069 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3070 {
3071     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3072 }
3073 
3074 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3075 {
3076     int s;
3077 
3078     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3079         return sizeof(struct VRingPackedDescEvent);
3080     }
3081 
3082     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3083     return offsetof(VRingAvail, ring) +
3084         sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3085 }
3086 
3087 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3088 {
3089     int s;
3090 
3091     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3092         return sizeof(struct VRingPackedDescEvent);
3093     }
3094 
3095     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3096     return offsetof(VRingUsed, ring) +
3097         sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3098 }
3099 
3100 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3101                                                            int n)
3102 {
3103     unsigned int avail, used;
3104 
3105     avail = vdev->vq[n].last_avail_idx;
3106     avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3107 
3108     used = vdev->vq[n].used_idx;
3109     used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3110 
3111     return avail | used << 16;
3112 }
3113 
3114 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3115                                                       int n)
3116 {
3117     return vdev->vq[n].last_avail_idx;
3118 }
3119 
3120 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3121 {
3122     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3123         return virtio_queue_packed_get_last_avail_idx(vdev, n);
3124     } else {
3125         return virtio_queue_split_get_last_avail_idx(vdev, n);
3126     }
3127 }
3128 
3129 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3130                                                    int n, unsigned int idx)
3131 {
3132     struct VirtQueue *vq = &vdev->vq[n];
3133 
3134     vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3135     vq->last_avail_wrap_counter =
3136         vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3137     idx >>= 16;
3138     vq->used_idx = idx & 0x7ffff;
3139     vq->used_wrap_counter = !!(idx & 0x8000);
3140 }
3141 
3142 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3143                                                   int n, unsigned int idx)
3144 {
3145         vdev->vq[n].last_avail_idx = idx;
3146         vdev->vq[n].shadow_avail_idx = idx;
3147 }
3148 
3149 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3150                                      unsigned int idx)
3151 {
3152     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3153         virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3154     } else {
3155         virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3156     }
3157 }
3158 
3159 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3160                                                        int n)
3161 {
3162     /* We don't have a reference like avail idx in shared memory */
3163     return;
3164 }
3165 
3166 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3167                                                       int n)
3168 {
3169     rcu_read_lock();
3170     if (vdev->vq[n].vring.desc) {
3171         vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3172         vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3173     }
3174     rcu_read_unlock();
3175 }
3176 
3177 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3178 {
3179     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3180         virtio_queue_packed_restore_last_avail_idx(vdev, n);
3181     } else {
3182         virtio_queue_split_restore_last_avail_idx(vdev, n);
3183     }
3184 }
3185 
3186 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3187 {
3188     /* used idx was updated through set_last_avail_idx() */
3189     return;
3190 }
3191 
3192 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3193 {
3194     rcu_read_lock();
3195     if (vdev->vq[n].vring.desc) {
3196         vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3197     }
3198     rcu_read_unlock();
3199 }
3200 
3201 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3202 {
3203     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3204         return virtio_queue_packed_update_used_idx(vdev, n);
3205     } else {
3206         return virtio_split_packed_update_used_idx(vdev, n);
3207     }
3208 }
3209 
3210 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3211 {
3212     vdev->vq[n].signalled_used_valid = false;
3213 }
3214 
3215 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3216 {
3217     return vdev->vq + n;
3218 }
3219 
3220 uint16_t virtio_get_queue_index(VirtQueue *vq)
3221 {
3222     return vq->queue_index;
3223 }
3224 
3225 static void virtio_queue_guest_notifier_read(EventNotifier *n)
3226 {
3227     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3228     if (event_notifier_test_and_clear(n)) {
3229         virtio_irq(vq);
3230     }
3231 }
3232 
3233 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3234                                                 bool with_irqfd)
3235 {
3236     if (assign && !with_irqfd) {
3237         event_notifier_set_handler(&vq->guest_notifier,
3238                                    virtio_queue_guest_notifier_read);
3239     } else {
3240         event_notifier_set_handler(&vq->guest_notifier, NULL);
3241     }
3242     if (!assign) {
3243         /* Test and clear notifier before closing it,
3244          * in case poll callback didn't have time to run. */
3245         virtio_queue_guest_notifier_read(&vq->guest_notifier);
3246     }
3247 }
3248 
3249 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3250 {
3251     return &vq->guest_notifier;
3252 }
3253 
3254 static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
3255 {
3256     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3257     if (event_notifier_test_and_clear(n)) {
3258         virtio_queue_notify_aio_vq(vq);
3259     }
3260 }
3261 
3262 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3263 {
3264     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3265 
3266     virtio_queue_set_notification(vq, 0);
3267 }
3268 
3269 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3270 {
3271     EventNotifier *n = opaque;
3272     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3273     bool progress;
3274 
3275     if (!vq->vring.desc || virtio_queue_empty(vq)) {
3276         return false;
3277     }
3278 
3279     progress = virtio_queue_notify_aio_vq(vq);
3280 
3281     /* In case the handler function re-enabled notifications */
3282     virtio_queue_set_notification(vq, 0);
3283     return progress;
3284 }
3285 
3286 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3287 {
3288     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3289 
3290     /* Caller polls once more after this to catch requests that race with us */
3291     virtio_queue_set_notification(vq, 1);
3292 }
3293 
3294 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
3295                                                 VirtIOHandleAIOOutput handle_output)
3296 {
3297     if (handle_output) {
3298         vq->handle_aio_output = handle_output;
3299         aio_set_event_notifier(ctx, &vq->host_notifier, true,
3300                                virtio_queue_host_notifier_aio_read,
3301                                virtio_queue_host_notifier_aio_poll);
3302         aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3303                                     virtio_queue_host_notifier_aio_poll_begin,
3304                                     virtio_queue_host_notifier_aio_poll_end);
3305     } else {
3306         aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
3307         /* Test and clear notifier before after disabling event,
3308          * in case poll callback didn't have time to run. */
3309         virtio_queue_host_notifier_aio_read(&vq->host_notifier);
3310         vq->handle_aio_output = NULL;
3311     }
3312 }
3313 
3314 void virtio_queue_host_notifier_read(EventNotifier *n)
3315 {
3316     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3317     if (event_notifier_test_and_clear(n)) {
3318         virtio_queue_notify_vq(vq);
3319     }
3320 }
3321 
3322 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3323 {
3324     return &vq->host_notifier;
3325 }
3326 
3327 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3328                                       MemoryRegion *mr, bool assign)
3329 {
3330     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3331     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3332 
3333     if (k->set_host_notifier_mr) {
3334         return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3335     }
3336 
3337     return -1;
3338 }
3339 
3340 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3341 {
3342     g_free(vdev->bus_name);
3343     vdev->bus_name = g_strdup(bus_name);
3344 }
3345 
3346 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3347 {
3348     va_list ap;
3349 
3350     va_start(ap, fmt);
3351     error_vreport(fmt, ap);
3352     va_end(ap);
3353 
3354     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3355         vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3356         virtio_notify_config(vdev);
3357     }
3358 
3359     vdev->broken = true;
3360 }
3361 
3362 static void virtio_memory_listener_commit(MemoryListener *listener)
3363 {
3364     VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3365     int i;
3366 
3367     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3368         if (vdev->vq[i].vring.num == 0) {
3369             break;
3370         }
3371         virtio_init_region_cache(vdev, i);
3372     }
3373 }
3374 
3375 static void virtio_device_realize(DeviceState *dev, Error **errp)
3376 {
3377     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3378     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3379     Error *err = NULL;
3380 
3381     /* Devices should either use vmsd or the load/save methods */
3382     assert(!vdc->vmsd || !vdc->load);
3383 
3384     if (vdc->realize != NULL) {
3385         vdc->realize(dev, &err);
3386         if (err != NULL) {
3387             error_propagate(errp, err);
3388             return;
3389         }
3390     }
3391 
3392     virtio_bus_device_plugged(vdev, &err);
3393     if (err != NULL) {
3394         error_propagate(errp, err);
3395         vdc->unrealize(dev, NULL);
3396         return;
3397     }
3398 
3399     vdev->listener.commit = virtio_memory_listener_commit;
3400     memory_listener_register(&vdev->listener, vdev->dma_as);
3401 }
3402 
3403 static void virtio_device_unrealize(DeviceState *dev, Error **errp)
3404 {
3405     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3406     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3407     Error *err = NULL;
3408 
3409     virtio_bus_device_unplugged(vdev);
3410 
3411     if (vdc->unrealize != NULL) {
3412         vdc->unrealize(dev, &err);
3413         if (err != NULL) {
3414             error_propagate(errp, err);
3415             return;
3416         }
3417     }
3418 
3419     g_free(vdev->bus_name);
3420     vdev->bus_name = NULL;
3421 }
3422 
3423 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3424 {
3425     int i;
3426     if (!vdev->vq) {
3427         return;
3428     }
3429 
3430     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3431         if (vdev->vq[i].vring.num == 0) {
3432             break;
3433         }
3434         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3435     }
3436     g_free(vdev->vq);
3437 }
3438 
3439 static void virtio_device_instance_finalize(Object *obj)
3440 {
3441     VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3442 
3443     memory_listener_unregister(&vdev->listener);
3444     virtio_device_free_virtqueues(vdev);
3445 
3446     g_free(vdev->config);
3447     g_free(vdev->vector_queues);
3448 }
3449 
3450 static Property virtio_properties[] = {
3451     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3452     DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3453     DEFINE_PROP_END_OF_LIST(),
3454 };
3455 
3456 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3457 {
3458     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3459     int i, n, r, err;
3460 
3461     memory_region_transaction_begin();
3462     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3463         VirtQueue *vq = &vdev->vq[n];
3464         if (!virtio_queue_get_num(vdev, n)) {
3465             continue;
3466         }
3467         r = virtio_bus_set_host_notifier(qbus, n, true);
3468         if (r < 0) {
3469             err = r;
3470             goto assign_error;
3471         }
3472         event_notifier_set_handler(&vq->host_notifier,
3473                                    virtio_queue_host_notifier_read);
3474     }
3475 
3476     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3477         /* Kick right away to begin processing requests already in vring */
3478         VirtQueue *vq = &vdev->vq[n];
3479         if (!vq->vring.num) {
3480             continue;
3481         }
3482         event_notifier_set(&vq->host_notifier);
3483     }
3484     memory_region_transaction_commit();
3485     return 0;
3486 
3487 assign_error:
3488     i = n; /* save n for a second iteration after transaction is committed. */
3489     while (--n >= 0) {
3490         VirtQueue *vq = &vdev->vq[n];
3491         if (!virtio_queue_get_num(vdev, n)) {
3492             continue;
3493         }
3494 
3495         event_notifier_set_handler(&vq->host_notifier, NULL);
3496         r = virtio_bus_set_host_notifier(qbus, n, false);
3497         assert(r >= 0);
3498     }
3499     memory_region_transaction_commit();
3500 
3501     while (--i >= 0) {
3502         if (!virtio_queue_get_num(vdev, i)) {
3503             continue;
3504         }
3505         virtio_bus_cleanup_host_notifier(qbus, i);
3506     }
3507     return err;
3508 }
3509 
3510 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3511 {
3512     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3513     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3514 
3515     return virtio_bus_start_ioeventfd(vbus);
3516 }
3517 
3518 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3519 {
3520     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3521     int n, r;
3522 
3523     memory_region_transaction_begin();
3524     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3525         VirtQueue *vq = &vdev->vq[n];
3526 
3527         if (!virtio_queue_get_num(vdev, n)) {
3528             continue;
3529         }
3530         event_notifier_set_handler(&vq->host_notifier, NULL);
3531         r = virtio_bus_set_host_notifier(qbus, n, false);
3532         assert(r >= 0);
3533     }
3534     memory_region_transaction_commit();
3535 
3536     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3537         if (!virtio_queue_get_num(vdev, n)) {
3538             continue;
3539         }
3540         virtio_bus_cleanup_host_notifier(qbus, n);
3541     }
3542 }
3543 
3544 void virtio_device_stop_ioeventfd(VirtIODevice *vdev)
3545 {
3546     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3547     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3548 
3549     virtio_bus_stop_ioeventfd(vbus);
3550 }
3551 
3552 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3553 {
3554     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3555     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3556 
3557     return virtio_bus_grab_ioeventfd(vbus);
3558 }
3559 
3560 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3561 {
3562     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3563     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3564 
3565     virtio_bus_release_ioeventfd(vbus);
3566 }
3567 
3568 static void virtio_device_class_init(ObjectClass *klass, void *data)
3569 {
3570     /* Set the default value here. */
3571     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3572     DeviceClass *dc = DEVICE_CLASS(klass);
3573 
3574     dc->realize = virtio_device_realize;
3575     dc->unrealize = virtio_device_unrealize;
3576     dc->bus_type = TYPE_VIRTIO_BUS;
3577     dc->props = virtio_properties;
3578     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3579     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3580 
3581     vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3582 }
3583 
3584 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3585 {
3586     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3587     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3588 
3589     return virtio_bus_ioeventfd_enabled(vbus);
3590 }
3591 
3592 static const TypeInfo virtio_device_info = {
3593     .name = TYPE_VIRTIO_DEVICE,
3594     .parent = TYPE_DEVICE,
3595     .instance_size = sizeof(VirtIODevice),
3596     .class_init = virtio_device_class_init,
3597     .instance_finalize = virtio_device_instance_finalize,
3598     .abstract = true,
3599     .class_size = sizeof(VirtioDeviceClass),
3600 };
3601 
3602 static void virtio_register_types(void)
3603 {
3604     type_register_static(&virtio_device_info);
3605 }
3606 
3607 type_init(virtio_register_types)
3608