xref: /openbmc/qemu/hw/virtio/virtio.c (revision f6822fee969aed8662baa4fdc38e6aeced3894ad)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qapi/qapi-commands-virtio.h"
17 #include "trace.h"
18 #include "qemu/defer-call.h"
19 #include "qemu/error-report.h"
20 #include "qemu/log.h"
21 #include "qemu/main-loop.h"
22 #include "qemu/module.h"
23 #include "qom/object_interfaces.h"
24 #include "hw/core/cpu.h"
25 #include "hw/virtio/virtio.h"
26 #include "hw/virtio/vhost.h"
27 #include "migration/qemu-file-types.h"
28 #include "qemu/atomic.h"
29 #include "hw/virtio/virtio-bus.h"
30 #include "hw/qdev-properties.h"
31 #include "hw/virtio/virtio-access.h"
32 #include "sysemu/dma.h"
33 #include "sysemu/runstate.h"
34 #include "virtio-qmp.h"
35 
36 #include "standard-headers/linux/virtio_ids.h"
37 #include "standard-headers/linux/vhost_types.h"
38 #include "standard-headers/linux/virtio_blk.h"
39 #include "standard-headers/linux/virtio_console.h"
40 #include "standard-headers/linux/virtio_gpu.h"
41 #include "standard-headers/linux/virtio_net.h"
42 #include "standard-headers/linux/virtio_scsi.h"
43 #include "standard-headers/linux/virtio_i2c.h"
44 #include "standard-headers/linux/virtio_balloon.h"
45 #include "standard-headers/linux/virtio_iommu.h"
46 #include "standard-headers/linux/virtio_mem.h"
47 #include "standard-headers/linux/virtio_vsock.h"
48 
49 /*
50  * Maximum size of virtio device config space
51  */
52 #define VHOST_USER_MAX_CONFIG_SIZE 256
53 
54 /*
55  * The alignment to use between consumer and producer parts of vring.
56  * x86 pagesize again. This is the default, used by transports like PCI
57  * which don't provide a means for the guest to tell the host the alignment.
58  */
59 #define VIRTIO_PCI_VRING_ALIGN         4096
60 
61 typedef struct VRingDesc
62 {
63     uint64_t addr;
64     uint32_t len;
65     uint16_t flags;
66     uint16_t next;
67 } VRingDesc;
68 
69 typedef struct VRingPackedDesc {
70     uint64_t addr;
71     uint32_t len;
72     uint16_t id;
73     uint16_t flags;
74 } VRingPackedDesc;
75 
76 typedef struct VRingAvail
77 {
78     uint16_t flags;
79     uint16_t idx;
80     uint16_t ring[];
81 } VRingAvail;
82 
83 typedef struct VRingUsedElem
84 {
85     uint32_t id;
86     uint32_t len;
87 } VRingUsedElem;
88 
89 typedef struct VRingUsed
90 {
91     uint16_t flags;
92     uint16_t idx;
93     VRingUsedElem ring[];
94 } VRingUsed;
95 
96 typedef struct VRingMemoryRegionCaches {
97     struct rcu_head rcu;
98     MemoryRegionCache desc;
99     MemoryRegionCache avail;
100     MemoryRegionCache used;
101 } VRingMemoryRegionCaches;
102 
103 typedef struct VRing
104 {
105     unsigned int num;
106     unsigned int num_default;
107     unsigned int align;
108     hwaddr desc;
109     hwaddr avail;
110     hwaddr used;
111     VRingMemoryRegionCaches *caches;
112 } VRing;
113 
114 typedef struct VRingPackedDescEvent {
115     uint16_t off_wrap;
116     uint16_t flags;
117 } VRingPackedDescEvent ;
118 
119 struct VirtQueue
120 {
121     VRing vring;
122     VirtQueueElement *used_elems;
123 
124     /* Next head to pop */
125     uint16_t last_avail_idx;
126     bool last_avail_wrap_counter;
127 
128     /* Last avail_idx read from VQ. */
129     uint16_t shadow_avail_idx;
130     bool shadow_avail_wrap_counter;
131 
132     uint16_t used_idx;
133     bool used_wrap_counter;
134 
135     /* Last used index value we have signalled on */
136     uint16_t signalled_used;
137 
138     /* Last used index value we have signalled on */
139     bool signalled_used_valid;
140 
141     /* Notification enabled? */
142     bool notification;
143 
144     uint16_t queue_index;
145 
146     unsigned int inuse;
147 
148     uint16_t vector;
149     VirtIOHandleOutput handle_output;
150     VirtIODevice *vdev;
151     EventNotifier guest_notifier;
152     EventNotifier host_notifier;
153     bool host_notifier_enabled;
154     QLIST_ENTRY(VirtQueue) node;
155 };
156 
157 const char *virtio_device_names[] = {
158     [VIRTIO_ID_NET] = "virtio-net",
159     [VIRTIO_ID_BLOCK] = "virtio-blk",
160     [VIRTIO_ID_CONSOLE] = "virtio-serial",
161     [VIRTIO_ID_RNG] = "virtio-rng",
162     [VIRTIO_ID_BALLOON] = "virtio-balloon",
163     [VIRTIO_ID_IOMEM] = "virtio-iomem",
164     [VIRTIO_ID_RPMSG] = "virtio-rpmsg",
165     [VIRTIO_ID_SCSI] = "virtio-scsi",
166     [VIRTIO_ID_9P] = "virtio-9p",
167     [VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan",
168     [VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial",
169     [VIRTIO_ID_CAIF] = "virtio-caif",
170     [VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon",
171     [VIRTIO_ID_GPU] = "virtio-gpu",
172     [VIRTIO_ID_CLOCK] = "virtio-clk",
173     [VIRTIO_ID_INPUT] = "virtio-input",
174     [VIRTIO_ID_VSOCK] = "vhost-vsock",
175     [VIRTIO_ID_CRYPTO] = "virtio-crypto",
176     [VIRTIO_ID_SIGNAL_DIST] = "virtio-signal",
177     [VIRTIO_ID_PSTORE] = "virtio-pstore",
178     [VIRTIO_ID_IOMMU] = "virtio-iommu",
179     [VIRTIO_ID_MEM] = "virtio-mem",
180     [VIRTIO_ID_SOUND] = "virtio-sound",
181     [VIRTIO_ID_FS] = "virtio-user-fs",
182     [VIRTIO_ID_PMEM] = "virtio-pmem",
183     [VIRTIO_ID_RPMB] = "virtio-rpmb",
184     [VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim",
185     [VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder",
186     [VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder",
187     [VIRTIO_ID_SCMI] = "virtio-scmi",
188     [VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod",
189     [VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c",
190     [VIRTIO_ID_WATCHDOG] = "virtio-watchdog",
191     [VIRTIO_ID_CAN] = "virtio-can",
192     [VIRTIO_ID_DMABUF] = "virtio-dmabuf",
193     [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv",
194     [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol",
195     [VIRTIO_ID_BT] = "virtio-bluetooth",
196     [VIRTIO_ID_GPIO] = "virtio-gpio"
197 };
198 
199 static const char *virtio_id_to_name(uint16_t device_id)
200 {
201     assert(device_id < G_N_ELEMENTS(virtio_device_names));
202     const char *name = virtio_device_names[device_id];
203     assert(name != NULL);
204     return name;
205 }
206 
207 /* Called within call_rcu().  */
208 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
209 {
210     assert(caches != NULL);
211     address_space_cache_destroy(&caches->desc);
212     address_space_cache_destroy(&caches->avail);
213     address_space_cache_destroy(&caches->used);
214     g_free(caches);
215 }
216 
217 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
218 {
219     VRingMemoryRegionCaches *caches;
220 
221     caches = qatomic_read(&vq->vring.caches);
222     qatomic_rcu_set(&vq->vring.caches, NULL);
223     if (caches) {
224         call_rcu(caches, virtio_free_region_cache, rcu);
225     }
226 }
227 
228 void virtio_init_region_cache(VirtIODevice *vdev, int n)
229 {
230     VirtQueue *vq = &vdev->vq[n];
231     VRingMemoryRegionCaches *old = vq->vring.caches;
232     VRingMemoryRegionCaches *new = NULL;
233     hwaddr addr, size;
234     int64_t len;
235     bool packed;
236 
237 
238     addr = vq->vring.desc;
239     if (!addr) {
240         goto out_no_cache;
241     }
242     new = g_new0(VRingMemoryRegionCaches, 1);
243     size = virtio_queue_get_desc_size(vdev, n);
244     packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
245                                    true : false;
246     len = address_space_cache_init(&new->desc, vdev->dma_as,
247                                    addr, size, packed);
248     if (len < size) {
249         virtio_error(vdev, "Cannot map desc");
250         goto err_desc;
251     }
252 
253     size = virtio_queue_get_used_size(vdev, n);
254     len = address_space_cache_init(&new->used, vdev->dma_as,
255                                    vq->vring.used, size, true);
256     if (len < size) {
257         virtio_error(vdev, "Cannot map used");
258         goto err_used;
259     }
260 
261     size = virtio_queue_get_avail_size(vdev, n);
262     len = address_space_cache_init(&new->avail, vdev->dma_as,
263                                    vq->vring.avail, size, false);
264     if (len < size) {
265         virtio_error(vdev, "Cannot map avail");
266         goto err_avail;
267     }
268 
269     qatomic_rcu_set(&vq->vring.caches, new);
270     if (old) {
271         call_rcu(old, virtio_free_region_cache, rcu);
272     }
273     return;
274 
275 err_avail:
276     address_space_cache_destroy(&new->avail);
277 err_used:
278     address_space_cache_destroy(&new->used);
279 err_desc:
280     address_space_cache_destroy(&new->desc);
281 out_no_cache:
282     g_free(new);
283     virtio_virtqueue_reset_region_cache(vq);
284 }
285 
286 /* virt queue functions */
287 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
288 {
289     VRing *vring = &vdev->vq[n].vring;
290 
291     if (!vring->num || !vring->desc || !vring->align) {
292         /* not yet setup -> nothing to do */
293         return;
294     }
295     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
296     vring->used = vring_align(vring->avail +
297                               offsetof(VRingAvail, ring[vring->num]),
298                               vring->align);
299     virtio_init_region_cache(vdev, n);
300 }
301 
302 /* Called within rcu_read_lock().  */
303 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
304                                   MemoryRegionCache *cache, int i)
305 {
306     address_space_read_cached(cache, i * sizeof(VRingDesc),
307                               desc, sizeof(VRingDesc));
308     virtio_tswap64s(vdev, &desc->addr);
309     virtio_tswap32s(vdev, &desc->len);
310     virtio_tswap16s(vdev, &desc->flags);
311     virtio_tswap16s(vdev, &desc->next);
312 }
313 
314 static void vring_packed_event_read(VirtIODevice *vdev,
315                                     MemoryRegionCache *cache,
316                                     VRingPackedDescEvent *e)
317 {
318     hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
319     hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
320 
321     e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
322     /* Make sure flags is seen before off_wrap */
323     smp_rmb();
324     e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
325     virtio_tswap16s(vdev, &e->flags);
326 }
327 
328 static void vring_packed_off_wrap_write(VirtIODevice *vdev,
329                                         MemoryRegionCache *cache,
330                                         uint16_t off_wrap)
331 {
332     hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
333 
334     virtio_stw_phys_cached(vdev, cache, off, off_wrap);
335     address_space_cache_invalidate(cache, off, sizeof(off_wrap));
336 }
337 
338 static void vring_packed_flags_write(VirtIODevice *vdev,
339                                      MemoryRegionCache *cache, uint16_t flags)
340 {
341     hwaddr off = offsetof(VRingPackedDescEvent, flags);
342 
343     virtio_stw_phys_cached(vdev, cache, off, flags);
344     address_space_cache_invalidate(cache, off, sizeof(flags));
345 }
346 
347 /* Called within rcu_read_lock().  */
348 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
349 {
350     return qatomic_rcu_read(&vq->vring.caches);
351 }
352 
353 /* Called within rcu_read_lock().  */
354 static inline uint16_t vring_avail_flags(VirtQueue *vq)
355 {
356     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
357     hwaddr pa = offsetof(VRingAvail, flags);
358 
359     if (!caches) {
360         return 0;
361     }
362 
363     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
364 }
365 
366 /* Called within rcu_read_lock().  */
367 static inline uint16_t vring_avail_idx(VirtQueue *vq)
368 {
369     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
370     hwaddr pa = offsetof(VRingAvail, idx);
371 
372     if (!caches) {
373         return 0;
374     }
375 
376     vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
377     return vq->shadow_avail_idx;
378 }
379 
380 /* Called within rcu_read_lock().  */
381 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
382 {
383     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
384     hwaddr pa = offsetof(VRingAvail, ring[i]);
385 
386     if (!caches) {
387         return 0;
388     }
389 
390     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
391 }
392 
393 /* Called within rcu_read_lock().  */
394 static inline uint16_t vring_get_used_event(VirtQueue *vq)
395 {
396     return vring_avail_ring(vq, vq->vring.num);
397 }
398 
399 /* Called within rcu_read_lock().  */
400 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
401                                     int i)
402 {
403     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
404     hwaddr pa = offsetof(VRingUsed, ring[i]);
405 
406     if (!caches) {
407         return;
408     }
409 
410     virtio_tswap32s(vq->vdev, &uelem->id);
411     virtio_tswap32s(vq->vdev, &uelem->len);
412     address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
413     address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
414 }
415 
416 /* Called within rcu_read_lock(). */
417 static inline uint16_t vring_used_flags(VirtQueue *vq)
418 {
419     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
420     hwaddr pa = offsetof(VRingUsed, flags);
421 
422     if (!caches) {
423         return 0;
424     }
425 
426     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
427 }
428 
429 /* Called within rcu_read_lock().  */
430 static uint16_t vring_used_idx(VirtQueue *vq)
431 {
432     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
433     hwaddr pa = offsetof(VRingUsed, idx);
434 
435     if (!caches) {
436         return 0;
437     }
438 
439     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
440 }
441 
442 /* Called within rcu_read_lock().  */
443 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
444 {
445     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
446     hwaddr pa = offsetof(VRingUsed, idx);
447 
448     if (caches) {
449         virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
450         address_space_cache_invalidate(&caches->used, pa, sizeof(val));
451     }
452 
453     vq->used_idx = val;
454 }
455 
456 /* Called within rcu_read_lock().  */
457 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
458 {
459     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
460     VirtIODevice *vdev = vq->vdev;
461     hwaddr pa = offsetof(VRingUsed, flags);
462     uint16_t flags;
463 
464     if (!caches) {
465         return;
466     }
467 
468     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
469     virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
470     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
471 }
472 
473 /* Called within rcu_read_lock().  */
474 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
475 {
476     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
477     VirtIODevice *vdev = vq->vdev;
478     hwaddr pa = offsetof(VRingUsed, flags);
479     uint16_t flags;
480 
481     if (!caches) {
482         return;
483     }
484 
485     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
486     virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
487     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
488 }
489 
490 /* Called within rcu_read_lock().  */
491 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
492 {
493     VRingMemoryRegionCaches *caches;
494     hwaddr pa;
495     if (!vq->notification) {
496         return;
497     }
498 
499     caches = vring_get_region_caches(vq);
500     if (!caches) {
501         return;
502     }
503 
504     pa = offsetof(VRingUsed, ring[vq->vring.num]);
505     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
506     address_space_cache_invalidate(&caches->used, pa, sizeof(val));
507 }
508 
509 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
510 {
511     RCU_READ_LOCK_GUARD();
512 
513     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
514         vring_set_avail_event(vq, vring_avail_idx(vq));
515     } else if (enable) {
516         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
517     } else {
518         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
519     }
520     if (enable) {
521         /* Expose avail event/used flags before caller checks the avail idx. */
522         smp_mb();
523     }
524 }
525 
526 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
527 {
528     uint16_t off_wrap;
529     VRingPackedDescEvent e;
530     VRingMemoryRegionCaches *caches;
531 
532     RCU_READ_LOCK_GUARD();
533     caches = vring_get_region_caches(vq);
534     if (!caches) {
535         return;
536     }
537 
538     vring_packed_event_read(vq->vdev, &caches->used, &e);
539 
540     if (!enable) {
541         e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
542     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
543         off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
544         vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
545         /* Make sure off_wrap is wrote before flags */
546         smp_wmb();
547         e.flags = VRING_PACKED_EVENT_FLAG_DESC;
548     } else {
549         e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
550     }
551 
552     vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
553     if (enable) {
554         /* Expose avail event/used flags before caller checks the avail idx. */
555         smp_mb();
556     }
557 }
558 
559 bool virtio_queue_get_notification(VirtQueue *vq)
560 {
561     return vq->notification;
562 }
563 
564 void virtio_queue_set_notification(VirtQueue *vq, int enable)
565 {
566     vq->notification = enable;
567 
568     if (!vq->vring.desc) {
569         return;
570     }
571 
572     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
573         virtio_queue_packed_set_notification(vq, enable);
574     } else {
575         virtio_queue_split_set_notification(vq, enable);
576     }
577 }
578 
579 int virtio_queue_ready(VirtQueue *vq)
580 {
581     return vq->vring.avail != 0;
582 }
583 
584 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
585                                          uint16_t *flags,
586                                          MemoryRegionCache *cache,
587                                          int i)
588 {
589     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
590 
591     *flags = virtio_lduw_phys_cached(vdev, cache, off);
592 }
593 
594 static void vring_packed_desc_read(VirtIODevice *vdev,
595                                    VRingPackedDesc *desc,
596                                    MemoryRegionCache *cache,
597                                    int i, bool strict_order)
598 {
599     hwaddr off = i * sizeof(VRingPackedDesc);
600 
601     vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
602 
603     if (strict_order) {
604         /* Make sure flags is read before the rest fields. */
605         smp_rmb();
606     }
607 
608     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
609                               &desc->addr, sizeof(desc->addr));
610     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
611                               &desc->id, sizeof(desc->id));
612     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
613                               &desc->len, sizeof(desc->len));
614     virtio_tswap64s(vdev, &desc->addr);
615     virtio_tswap16s(vdev, &desc->id);
616     virtio_tswap32s(vdev, &desc->len);
617 }
618 
619 static void vring_packed_desc_write_data(VirtIODevice *vdev,
620                                          VRingPackedDesc *desc,
621                                          MemoryRegionCache *cache,
622                                          int i)
623 {
624     hwaddr off_id = i * sizeof(VRingPackedDesc) +
625                     offsetof(VRingPackedDesc, id);
626     hwaddr off_len = i * sizeof(VRingPackedDesc) +
627                     offsetof(VRingPackedDesc, len);
628 
629     virtio_tswap32s(vdev, &desc->len);
630     virtio_tswap16s(vdev, &desc->id);
631     address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
632     address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
633     address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
634     address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
635 }
636 
637 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
638                                           VRingPackedDesc *desc,
639                                           MemoryRegionCache *cache,
640                                           int i)
641 {
642     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
643 
644     virtio_stw_phys_cached(vdev, cache, off, desc->flags);
645     address_space_cache_invalidate(cache, off, sizeof(desc->flags));
646 }
647 
648 static void vring_packed_desc_write(VirtIODevice *vdev,
649                                     VRingPackedDesc *desc,
650                                     MemoryRegionCache *cache,
651                                     int i, bool strict_order)
652 {
653     vring_packed_desc_write_data(vdev, desc, cache, i);
654     if (strict_order) {
655         /* Make sure data is wrote before flags. */
656         smp_wmb();
657     }
658     vring_packed_desc_write_flags(vdev, desc, cache, i);
659 }
660 
661 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
662 {
663     bool avail, used;
664 
665     avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
666     used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
667     return (avail != used) && (avail == wrap_counter);
668 }
669 
670 /* Fetch avail_idx from VQ memory only when we really need to know if
671  * guest has added some buffers.
672  * Called within rcu_read_lock().  */
673 static int virtio_queue_empty_rcu(VirtQueue *vq)
674 {
675     if (virtio_device_disabled(vq->vdev)) {
676         return 1;
677     }
678 
679     if (unlikely(!vq->vring.avail)) {
680         return 1;
681     }
682 
683     if (vq->shadow_avail_idx != vq->last_avail_idx) {
684         return 0;
685     }
686 
687     return vring_avail_idx(vq) == vq->last_avail_idx;
688 }
689 
690 static int virtio_queue_split_empty(VirtQueue *vq)
691 {
692     bool empty;
693 
694     if (virtio_device_disabled(vq->vdev)) {
695         return 1;
696     }
697 
698     if (unlikely(!vq->vring.avail)) {
699         return 1;
700     }
701 
702     if (vq->shadow_avail_idx != vq->last_avail_idx) {
703         return 0;
704     }
705 
706     RCU_READ_LOCK_GUARD();
707     empty = vring_avail_idx(vq) == vq->last_avail_idx;
708     return empty;
709 }
710 
711 /* Called within rcu_read_lock().  */
712 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
713 {
714     struct VRingPackedDesc desc;
715     VRingMemoryRegionCaches *cache;
716 
717     if (unlikely(!vq->vring.desc)) {
718         return 1;
719     }
720 
721     cache = vring_get_region_caches(vq);
722     if (!cache) {
723         return 1;
724     }
725 
726     vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
727                                  vq->last_avail_idx);
728 
729     return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
730 }
731 
732 static int virtio_queue_packed_empty(VirtQueue *vq)
733 {
734     RCU_READ_LOCK_GUARD();
735     return virtio_queue_packed_empty_rcu(vq);
736 }
737 
738 int virtio_queue_empty(VirtQueue *vq)
739 {
740     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
741         return virtio_queue_packed_empty(vq);
742     } else {
743         return virtio_queue_split_empty(vq);
744     }
745 }
746 
747 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
748                                unsigned int len)
749 {
750     AddressSpace *dma_as = vq->vdev->dma_as;
751     unsigned int offset;
752     int i;
753 
754     offset = 0;
755     for (i = 0; i < elem->in_num; i++) {
756         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
757 
758         dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
759                          elem->in_sg[i].iov_len,
760                          DMA_DIRECTION_FROM_DEVICE, size);
761 
762         offset += size;
763     }
764 
765     for (i = 0; i < elem->out_num; i++)
766         dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
767                          elem->out_sg[i].iov_len,
768                          DMA_DIRECTION_TO_DEVICE,
769                          elem->out_sg[i].iov_len);
770 }
771 
772 /* virtqueue_detach_element:
773  * @vq: The #VirtQueue
774  * @elem: The #VirtQueueElement
775  * @len: number of bytes written
776  *
777  * Detach the element from the virtqueue.  This function is suitable for device
778  * reset or other situations where a #VirtQueueElement is simply freed and will
779  * not be pushed or discarded.
780  */
781 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
782                               unsigned int len)
783 {
784     vq->inuse -= elem->ndescs;
785     virtqueue_unmap_sg(vq, elem, len);
786 }
787 
788 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
789 {
790     vq->last_avail_idx -= num;
791 }
792 
793 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
794 {
795     if (vq->last_avail_idx < num) {
796         vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
797         vq->last_avail_wrap_counter ^= 1;
798     } else {
799         vq->last_avail_idx -= num;
800     }
801 }
802 
803 /* virtqueue_unpop:
804  * @vq: The #VirtQueue
805  * @elem: The #VirtQueueElement
806  * @len: number of bytes written
807  *
808  * Pretend the most recent element wasn't popped from the virtqueue.  The next
809  * call to virtqueue_pop() will refetch the element.
810  */
811 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
812                      unsigned int len)
813 {
814 
815     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
816         virtqueue_packed_rewind(vq, 1);
817     } else {
818         virtqueue_split_rewind(vq, 1);
819     }
820 
821     virtqueue_detach_element(vq, elem, len);
822 }
823 
824 /* virtqueue_rewind:
825  * @vq: The #VirtQueue
826  * @num: Number of elements to push back
827  *
828  * Pretend that elements weren't popped from the virtqueue.  The next
829  * virtqueue_pop() will refetch the oldest element.
830  *
831  * Use virtqueue_unpop() instead if you have a VirtQueueElement.
832  *
833  * Returns: true on success, false if @num is greater than the number of in use
834  * elements.
835  */
836 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
837 {
838     if (num > vq->inuse) {
839         return false;
840     }
841 
842     vq->inuse -= num;
843     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
844         virtqueue_packed_rewind(vq, num);
845     } else {
846         virtqueue_split_rewind(vq, num);
847     }
848     return true;
849 }
850 
851 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
852                     unsigned int len, unsigned int idx)
853 {
854     VRingUsedElem uelem;
855 
856     if (unlikely(!vq->vring.used)) {
857         return;
858     }
859 
860     idx = (idx + vq->used_idx) % vq->vring.num;
861 
862     uelem.id = elem->index;
863     uelem.len = len;
864     vring_used_write(vq, &uelem, idx);
865 }
866 
867 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
868                                   unsigned int len, unsigned int idx)
869 {
870     vq->used_elems[idx].index = elem->index;
871     vq->used_elems[idx].len = len;
872     vq->used_elems[idx].ndescs = elem->ndescs;
873 }
874 
875 static void virtqueue_packed_fill_desc(VirtQueue *vq,
876                                        const VirtQueueElement *elem,
877                                        unsigned int idx,
878                                        bool strict_order)
879 {
880     uint16_t head;
881     VRingMemoryRegionCaches *caches;
882     VRingPackedDesc desc = {
883         .id = elem->index,
884         .len = elem->len,
885     };
886     bool wrap_counter = vq->used_wrap_counter;
887 
888     if (unlikely(!vq->vring.desc)) {
889         return;
890     }
891 
892     head = vq->used_idx + idx;
893     if (head >= vq->vring.num) {
894         head -= vq->vring.num;
895         wrap_counter ^= 1;
896     }
897     if (wrap_counter) {
898         desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
899         desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
900     } else {
901         desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
902         desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
903     }
904 
905     caches = vring_get_region_caches(vq);
906     if (!caches) {
907         return;
908     }
909 
910     vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
911 }
912 
913 /* Called within rcu_read_lock().  */
914 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
915                     unsigned int len, unsigned int idx)
916 {
917     trace_virtqueue_fill(vq, elem, len, idx);
918 
919     virtqueue_unmap_sg(vq, elem, len);
920 
921     if (virtio_device_disabled(vq->vdev)) {
922         return;
923     }
924 
925     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
926         virtqueue_packed_fill(vq, elem, len, idx);
927     } else {
928         virtqueue_split_fill(vq, elem, len, idx);
929     }
930 }
931 
932 /* Called within rcu_read_lock().  */
933 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
934 {
935     uint16_t old, new;
936 
937     if (unlikely(!vq->vring.used)) {
938         return;
939     }
940 
941     /* Make sure buffer is written before we update index. */
942     smp_wmb();
943     trace_virtqueue_flush(vq, count);
944     old = vq->used_idx;
945     new = old + count;
946     vring_used_idx_set(vq, new);
947     vq->inuse -= count;
948     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
949         vq->signalled_used_valid = false;
950 }
951 
952 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
953 {
954     unsigned int i, ndescs = 0;
955 
956     if (unlikely(!vq->vring.desc)) {
957         return;
958     }
959 
960     for (i = 1; i < count; i++) {
961         virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
962         ndescs += vq->used_elems[i].ndescs;
963     }
964     virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
965     ndescs += vq->used_elems[0].ndescs;
966 
967     vq->inuse -= ndescs;
968     vq->used_idx += ndescs;
969     if (vq->used_idx >= vq->vring.num) {
970         vq->used_idx -= vq->vring.num;
971         vq->used_wrap_counter ^= 1;
972         vq->signalled_used_valid = false;
973     }
974 }
975 
976 void virtqueue_flush(VirtQueue *vq, unsigned int count)
977 {
978     if (virtio_device_disabled(vq->vdev)) {
979         vq->inuse -= count;
980         return;
981     }
982 
983     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
984         virtqueue_packed_flush(vq, count);
985     } else {
986         virtqueue_split_flush(vq, count);
987     }
988 }
989 
990 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
991                     unsigned int len)
992 {
993     RCU_READ_LOCK_GUARD();
994     virtqueue_fill(vq, elem, len, 0);
995     virtqueue_flush(vq, 1);
996 }
997 
998 /* Called within rcu_read_lock().  */
999 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
1000 {
1001     uint16_t avail_idx, num_heads;
1002 
1003     /* Use shadow index whenever possible. */
1004     avail_idx = (vq->shadow_avail_idx != idx) ? vq->shadow_avail_idx
1005                                               : vring_avail_idx(vq);
1006     num_heads = avail_idx - idx;
1007 
1008     /* Check it isn't doing very strange things with descriptor numbers. */
1009     if (num_heads > vq->vring.num) {
1010         virtio_error(vq->vdev, "Guest moved used index from %u to %u",
1011                      idx, vq->shadow_avail_idx);
1012         return -EINVAL;
1013     }
1014     /*
1015      * On success, callers read a descriptor at vq->last_avail_idx.
1016      * Make sure descriptor read does not bypass avail index read.
1017      *
1018      * This is necessary even if we are using a shadow index, since
1019      * the shadow index could have been initialized by calling
1020      * vring_avail_idx() outside of this function, i.e., by a guest
1021      * memory read not accompanied by a barrier.
1022      */
1023     if (num_heads) {
1024         smp_rmb();
1025     }
1026 
1027     return num_heads;
1028 }
1029 
1030 /* Called within rcu_read_lock().  */
1031 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
1032                                unsigned int *head)
1033 {
1034     /* Grab the next descriptor number they're advertising, and increment
1035      * the index we've seen. */
1036     *head = vring_avail_ring(vq, idx % vq->vring.num);
1037 
1038     /* If their number is silly, that's a fatal mistake. */
1039     if (*head >= vq->vring.num) {
1040         virtio_error(vq->vdev, "Guest says index %u is available", *head);
1041         return false;
1042     }
1043 
1044     return true;
1045 }
1046 
1047 enum {
1048     VIRTQUEUE_READ_DESC_ERROR = -1,
1049     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
1050     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
1051 };
1052 
1053 /* Reads the 'desc->next' descriptor into '*desc'. */
1054 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
1055                                           MemoryRegionCache *desc_cache,
1056                                           unsigned int max)
1057 {
1058     /* If this descriptor says it doesn't chain, we're done. */
1059     if (!(desc->flags & VRING_DESC_F_NEXT)) {
1060         return VIRTQUEUE_READ_DESC_DONE;
1061     }
1062 
1063     /* Check they're not leading us off end of descriptors. */
1064     if (desc->next >= max) {
1065         virtio_error(vdev, "Desc next is %u", desc->next);
1066         return VIRTQUEUE_READ_DESC_ERROR;
1067     }
1068 
1069     vring_split_desc_read(vdev, desc, desc_cache, desc->next);
1070     return VIRTQUEUE_READ_DESC_MORE;
1071 }
1072 
1073 /* Called within rcu_read_lock().  */
1074 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
1075                             unsigned int *in_bytes, unsigned int *out_bytes,
1076                             unsigned max_in_bytes, unsigned max_out_bytes,
1077                             VRingMemoryRegionCaches *caches)
1078 {
1079     VirtIODevice *vdev = vq->vdev;
1080     unsigned int idx;
1081     unsigned int total_bufs, in_total, out_total;
1082     MemoryRegionCache indirect_desc_cache;
1083     int64_t len = 0;
1084     int rc;
1085 
1086     address_space_cache_init_empty(&indirect_desc_cache);
1087 
1088     idx = vq->last_avail_idx;
1089     total_bufs = in_total = out_total = 0;
1090 
1091     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1092         MemoryRegionCache *desc_cache = &caches->desc;
1093         unsigned int num_bufs;
1094         VRingDesc desc;
1095         unsigned int i;
1096         unsigned int max = vq->vring.num;
1097 
1098         num_bufs = total_bufs;
1099 
1100         if (!virtqueue_get_head(vq, idx++, &i)) {
1101             goto err;
1102         }
1103 
1104         vring_split_desc_read(vdev, &desc, desc_cache, i);
1105 
1106         if (desc.flags & VRING_DESC_F_INDIRECT) {
1107             if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1108                 virtio_error(vdev, "Invalid size for indirect buffer table");
1109                 goto err;
1110             }
1111 
1112             /* If we've got too many, that implies a descriptor loop. */
1113             if (num_bufs >= max) {
1114                 virtio_error(vdev, "Looped descriptor");
1115                 goto err;
1116             }
1117 
1118             /* loop over the indirect descriptor table */
1119             len = address_space_cache_init(&indirect_desc_cache,
1120                                            vdev->dma_as,
1121                                            desc.addr, desc.len, false);
1122             desc_cache = &indirect_desc_cache;
1123             if (len < desc.len) {
1124                 virtio_error(vdev, "Cannot map indirect buffer");
1125                 goto err;
1126             }
1127 
1128             max = desc.len / sizeof(VRingDesc);
1129             num_bufs = i = 0;
1130             vring_split_desc_read(vdev, &desc, desc_cache, i);
1131         }
1132 
1133         do {
1134             /* If we've got too many, that implies a descriptor loop. */
1135             if (++num_bufs > max) {
1136                 virtio_error(vdev, "Looped descriptor");
1137                 goto err;
1138             }
1139 
1140             if (desc.flags & VRING_DESC_F_WRITE) {
1141                 in_total += desc.len;
1142             } else {
1143                 out_total += desc.len;
1144             }
1145             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1146                 goto done;
1147             }
1148 
1149             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max);
1150         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1151 
1152         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1153             goto err;
1154         }
1155 
1156         if (desc_cache == &indirect_desc_cache) {
1157             address_space_cache_destroy(&indirect_desc_cache);
1158             total_bufs++;
1159         } else {
1160             total_bufs = num_bufs;
1161         }
1162     }
1163 
1164     if (rc < 0) {
1165         goto err;
1166     }
1167 
1168 done:
1169     address_space_cache_destroy(&indirect_desc_cache);
1170     if (in_bytes) {
1171         *in_bytes = in_total;
1172     }
1173     if (out_bytes) {
1174         *out_bytes = out_total;
1175     }
1176     return;
1177 
1178 err:
1179     in_total = out_total = 0;
1180     goto done;
1181 }
1182 
1183 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1184                                            VRingPackedDesc *desc,
1185                                            MemoryRegionCache
1186                                            *desc_cache,
1187                                            unsigned int max,
1188                                            unsigned int *next,
1189                                            bool indirect)
1190 {
1191     /* If this descriptor says it doesn't chain, we're done. */
1192     if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1193         return VIRTQUEUE_READ_DESC_DONE;
1194     }
1195 
1196     ++*next;
1197     if (*next == max) {
1198         if (indirect) {
1199             return VIRTQUEUE_READ_DESC_DONE;
1200         } else {
1201             (*next) -= vq->vring.num;
1202         }
1203     }
1204 
1205     vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1206     return VIRTQUEUE_READ_DESC_MORE;
1207 }
1208 
1209 /* Called within rcu_read_lock().  */
1210 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1211                                              unsigned int *in_bytes,
1212                                              unsigned int *out_bytes,
1213                                              unsigned max_in_bytes,
1214                                              unsigned max_out_bytes,
1215                                              VRingMemoryRegionCaches *caches)
1216 {
1217     VirtIODevice *vdev = vq->vdev;
1218     unsigned int idx;
1219     unsigned int total_bufs, in_total, out_total;
1220     MemoryRegionCache indirect_desc_cache;
1221     MemoryRegionCache *desc_cache;
1222     int64_t len = 0;
1223     VRingPackedDesc desc;
1224     bool wrap_counter;
1225 
1226     address_space_cache_init_empty(&indirect_desc_cache);
1227 
1228     idx = vq->last_avail_idx;
1229     wrap_counter = vq->last_avail_wrap_counter;
1230     total_bufs = in_total = out_total = 0;
1231 
1232     for (;;) {
1233         unsigned int num_bufs = total_bufs;
1234         unsigned int i = idx;
1235         int rc;
1236         unsigned int max = vq->vring.num;
1237 
1238         desc_cache = &caches->desc;
1239 
1240         vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1241         if (!is_desc_avail(desc.flags, wrap_counter)) {
1242             break;
1243         }
1244 
1245         if (desc.flags & VRING_DESC_F_INDIRECT) {
1246             if (desc.len % sizeof(VRingPackedDesc)) {
1247                 virtio_error(vdev, "Invalid size for indirect buffer table");
1248                 goto err;
1249             }
1250 
1251             /* If we've got too many, that implies a descriptor loop. */
1252             if (num_bufs >= max) {
1253                 virtio_error(vdev, "Looped descriptor");
1254                 goto err;
1255             }
1256 
1257             /* loop over the indirect descriptor table */
1258             len = address_space_cache_init(&indirect_desc_cache,
1259                                            vdev->dma_as,
1260                                            desc.addr, desc.len, false);
1261             desc_cache = &indirect_desc_cache;
1262             if (len < desc.len) {
1263                 virtio_error(vdev, "Cannot map indirect buffer");
1264                 goto err;
1265             }
1266 
1267             max = desc.len / sizeof(VRingPackedDesc);
1268             num_bufs = i = 0;
1269             vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1270         }
1271 
1272         do {
1273             /* If we've got too many, that implies a descriptor loop. */
1274             if (++num_bufs > max) {
1275                 virtio_error(vdev, "Looped descriptor");
1276                 goto err;
1277             }
1278 
1279             if (desc.flags & VRING_DESC_F_WRITE) {
1280                 in_total += desc.len;
1281             } else {
1282                 out_total += desc.len;
1283             }
1284             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1285                 goto done;
1286             }
1287 
1288             rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1289                                                  &i, desc_cache ==
1290                                                  &indirect_desc_cache);
1291         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1292 
1293         if (desc_cache == &indirect_desc_cache) {
1294             address_space_cache_destroy(&indirect_desc_cache);
1295             total_bufs++;
1296             idx++;
1297         } else {
1298             idx += num_bufs - total_bufs;
1299             total_bufs = num_bufs;
1300         }
1301 
1302         if (idx >= vq->vring.num) {
1303             idx -= vq->vring.num;
1304             wrap_counter ^= 1;
1305         }
1306     }
1307 
1308     /* Record the index and wrap counter for a kick we want */
1309     vq->shadow_avail_idx = idx;
1310     vq->shadow_avail_wrap_counter = wrap_counter;
1311 done:
1312     address_space_cache_destroy(&indirect_desc_cache);
1313     if (in_bytes) {
1314         *in_bytes = in_total;
1315     }
1316     if (out_bytes) {
1317         *out_bytes = out_total;
1318     }
1319     return;
1320 
1321 err:
1322     in_total = out_total = 0;
1323     goto done;
1324 }
1325 
1326 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1327                                unsigned int *out_bytes,
1328                                unsigned max_in_bytes, unsigned max_out_bytes)
1329 {
1330     uint16_t desc_size;
1331     VRingMemoryRegionCaches *caches;
1332 
1333     RCU_READ_LOCK_GUARD();
1334 
1335     if (unlikely(!vq->vring.desc)) {
1336         goto err;
1337     }
1338 
1339     caches = vring_get_region_caches(vq);
1340     if (!caches) {
1341         goto err;
1342     }
1343 
1344     desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1345                                 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1346     if (caches->desc.len < vq->vring.num * desc_size) {
1347         virtio_error(vq->vdev, "Cannot map descriptor ring");
1348         goto err;
1349     }
1350 
1351     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1352         virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1353                                          max_in_bytes, max_out_bytes,
1354                                          caches);
1355     } else {
1356         virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1357                                         max_in_bytes, max_out_bytes,
1358                                         caches);
1359     }
1360 
1361     return;
1362 err:
1363     if (in_bytes) {
1364         *in_bytes = 0;
1365     }
1366     if (out_bytes) {
1367         *out_bytes = 0;
1368     }
1369 }
1370 
1371 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1372                           unsigned int out_bytes)
1373 {
1374     unsigned int in_total, out_total;
1375 
1376     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1377     return in_bytes <= in_total && out_bytes <= out_total;
1378 }
1379 
1380 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1381                                hwaddr *addr, struct iovec *iov,
1382                                unsigned int max_num_sg, bool is_write,
1383                                hwaddr pa, size_t sz)
1384 {
1385     bool ok = false;
1386     unsigned num_sg = *p_num_sg;
1387     assert(num_sg <= max_num_sg);
1388 
1389     if (!sz) {
1390         virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1391         goto out;
1392     }
1393 
1394     while (sz) {
1395         hwaddr len = sz;
1396 
1397         if (num_sg == max_num_sg) {
1398             virtio_error(vdev, "virtio: too many write descriptors in "
1399                                "indirect table");
1400             goto out;
1401         }
1402 
1403         iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1404                                               is_write ?
1405                                               DMA_DIRECTION_FROM_DEVICE :
1406                                               DMA_DIRECTION_TO_DEVICE,
1407                                               MEMTXATTRS_UNSPECIFIED);
1408         if (!iov[num_sg].iov_base) {
1409             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1410             goto out;
1411         }
1412 
1413         iov[num_sg].iov_len = len;
1414         addr[num_sg] = pa;
1415 
1416         sz -= len;
1417         pa += len;
1418         num_sg++;
1419     }
1420     ok = true;
1421 
1422 out:
1423     *p_num_sg = num_sg;
1424     return ok;
1425 }
1426 
1427 /* Only used by error code paths before we have a VirtQueueElement (therefore
1428  * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1429  * yet.
1430  */
1431 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1432                                     struct iovec *iov)
1433 {
1434     unsigned int i;
1435 
1436     for (i = 0; i < out_num + in_num; i++) {
1437         int is_write = i >= out_num;
1438 
1439         cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1440         iov++;
1441     }
1442 }
1443 
1444 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1445                                 hwaddr *addr, unsigned int num_sg,
1446                                 bool is_write)
1447 {
1448     unsigned int i;
1449     hwaddr len;
1450 
1451     for (i = 0; i < num_sg; i++) {
1452         len = sg[i].iov_len;
1453         sg[i].iov_base = dma_memory_map(vdev->dma_as,
1454                                         addr[i], &len, is_write ?
1455                                         DMA_DIRECTION_FROM_DEVICE :
1456                                         DMA_DIRECTION_TO_DEVICE,
1457                                         MEMTXATTRS_UNSPECIFIED);
1458         if (!sg[i].iov_base) {
1459             error_report("virtio: error trying to map MMIO memory");
1460             exit(1);
1461         }
1462         if (len != sg[i].iov_len) {
1463             error_report("virtio: unexpected memory split");
1464             exit(1);
1465         }
1466     }
1467 }
1468 
1469 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1470 {
1471     virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1472     virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1473                                                                         false);
1474 }
1475 
1476 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1477 {
1478     VirtQueueElement *elem;
1479     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1480     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1481     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1482     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1483     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1484     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1485 
1486     assert(sz >= sizeof(VirtQueueElement));
1487     elem = g_malloc(out_sg_end);
1488     trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1489     elem->out_num = out_num;
1490     elem->in_num = in_num;
1491     elem->in_addr = (void *)elem + in_addr_ofs;
1492     elem->out_addr = (void *)elem + out_addr_ofs;
1493     elem->in_sg = (void *)elem + in_sg_ofs;
1494     elem->out_sg = (void *)elem + out_sg_ofs;
1495     return elem;
1496 }
1497 
1498 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1499 {
1500     unsigned int i, head, max;
1501     VRingMemoryRegionCaches *caches;
1502     MemoryRegionCache indirect_desc_cache;
1503     MemoryRegionCache *desc_cache;
1504     int64_t len;
1505     VirtIODevice *vdev = vq->vdev;
1506     VirtQueueElement *elem = NULL;
1507     unsigned out_num, in_num, elem_entries;
1508     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1509     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1510     VRingDesc desc;
1511     int rc;
1512 
1513     address_space_cache_init_empty(&indirect_desc_cache);
1514 
1515     RCU_READ_LOCK_GUARD();
1516     if (virtio_queue_empty_rcu(vq)) {
1517         goto done;
1518     }
1519     /* Needed after virtio_queue_empty(), see comment in
1520      * virtqueue_num_heads(). */
1521     smp_rmb();
1522 
1523     /* When we start there are none of either input nor output. */
1524     out_num = in_num = elem_entries = 0;
1525 
1526     max = vq->vring.num;
1527 
1528     if (vq->inuse >= vq->vring.num) {
1529         virtio_error(vdev, "Virtqueue size exceeded");
1530         goto done;
1531     }
1532 
1533     if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1534         goto done;
1535     }
1536 
1537     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1538         vring_set_avail_event(vq, vq->last_avail_idx);
1539     }
1540 
1541     i = head;
1542 
1543     caches = vring_get_region_caches(vq);
1544     if (!caches) {
1545         virtio_error(vdev, "Region caches not initialized");
1546         goto done;
1547     }
1548 
1549     if (caches->desc.len < max * sizeof(VRingDesc)) {
1550         virtio_error(vdev, "Cannot map descriptor ring");
1551         goto done;
1552     }
1553 
1554     desc_cache = &caches->desc;
1555     vring_split_desc_read(vdev, &desc, desc_cache, i);
1556     if (desc.flags & VRING_DESC_F_INDIRECT) {
1557         if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1558             virtio_error(vdev, "Invalid size for indirect buffer table");
1559             goto done;
1560         }
1561 
1562         /* loop over the indirect descriptor table */
1563         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1564                                        desc.addr, desc.len, false);
1565         desc_cache = &indirect_desc_cache;
1566         if (len < desc.len) {
1567             virtio_error(vdev, "Cannot map indirect buffer");
1568             goto done;
1569         }
1570 
1571         max = desc.len / sizeof(VRingDesc);
1572         i = 0;
1573         vring_split_desc_read(vdev, &desc, desc_cache, i);
1574     }
1575 
1576     /* Collect all the descriptors */
1577     do {
1578         bool map_ok;
1579 
1580         if (desc.flags & VRING_DESC_F_WRITE) {
1581             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1582                                         iov + out_num,
1583                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1584                                         desc.addr, desc.len);
1585         } else {
1586             if (in_num) {
1587                 virtio_error(vdev, "Incorrect order for descriptors");
1588                 goto err_undo_map;
1589             }
1590             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1591                                         VIRTQUEUE_MAX_SIZE, false,
1592                                         desc.addr, desc.len);
1593         }
1594         if (!map_ok) {
1595             goto err_undo_map;
1596         }
1597 
1598         /* If we've got too many, that implies a descriptor loop. */
1599         if (++elem_entries > max) {
1600             virtio_error(vdev, "Looped descriptor");
1601             goto err_undo_map;
1602         }
1603 
1604         rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max);
1605     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1606 
1607     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1608         goto err_undo_map;
1609     }
1610 
1611     /* Now copy what we have collected and mapped */
1612     elem = virtqueue_alloc_element(sz, out_num, in_num);
1613     elem->index = head;
1614     elem->ndescs = 1;
1615     for (i = 0; i < out_num; i++) {
1616         elem->out_addr[i] = addr[i];
1617         elem->out_sg[i] = iov[i];
1618     }
1619     for (i = 0; i < in_num; i++) {
1620         elem->in_addr[i] = addr[out_num + i];
1621         elem->in_sg[i] = iov[out_num + i];
1622     }
1623 
1624     vq->inuse++;
1625 
1626     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1627 done:
1628     address_space_cache_destroy(&indirect_desc_cache);
1629 
1630     return elem;
1631 
1632 err_undo_map:
1633     virtqueue_undo_map_desc(out_num, in_num, iov);
1634     goto done;
1635 }
1636 
1637 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1638 {
1639     unsigned int i, max;
1640     VRingMemoryRegionCaches *caches;
1641     MemoryRegionCache indirect_desc_cache;
1642     MemoryRegionCache *desc_cache;
1643     int64_t len;
1644     VirtIODevice *vdev = vq->vdev;
1645     VirtQueueElement *elem = NULL;
1646     unsigned out_num, in_num, elem_entries;
1647     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1648     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1649     VRingPackedDesc desc;
1650     uint16_t id;
1651     int rc;
1652 
1653     address_space_cache_init_empty(&indirect_desc_cache);
1654 
1655     RCU_READ_LOCK_GUARD();
1656     if (virtio_queue_packed_empty_rcu(vq)) {
1657         goto done;
1658     }
1659 
1660     /* When we start there are none of either input nor output. */
1661     out_num = in_num = elem_entries = 0;
1662 
1663     max = vq->vring.num;
1664 
1665     if (vq->inuse >= vq->vring.num) {
1666         virtio_error(vdev, "Virtqueue size exceeded");
1667         goto done;
1668     }
1669 
1670     i = vq->last_avail_idx;
1671 
1672     caches = vring_get_region_caches(vq);
1673     if (!caches) {
1674         virtio_error(vdev, "Region caches not initialized");
1675         goto done;
1676     }
1677 
1678     if (caches->desc.len < max * sizeof(VRingDesc)) {
1679         virtio_error(vdev, "Cannot map descriptor ring");
1680         goto done;
1681     }
1682 
1683     desc_cache = &caches->desc;
1684     vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1685     id = desc.id;
1686     if (desc.flags & VRING_DESC_F_INDIRECT) {
1687         if (desc.len % sizeof(VRingPackedDesc)) {
1688             virtio_error(vdev, "Invalid size for indirect buffer table");
1689             goto done;
1690         }
1691 
1692         /* loop over the indirect descriptor table */
1693         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1694                                        desc.addr, desc.len, false);
1695         desc_cache = &indirect_desc_cache;
1696         if (len < desc.len) {
1697             virtio_error(vdev, "Cannot map indirect buffer");
1698             goto done;
1699         }
1700 
1701         max = desc.len / sizeof(VRingPackedDesc);
1702         i = 0;
1703         vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1704     }
1705 
1706     /* Collect all the descriptors */
1707     do {
1708         bool map_ok;
1709 
1710         if (desc.flags & VRING_DESC_F_WRITE) {
1711             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1712                                         iov + out_num,
1713                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1714                                         desc.addr, desc.len);
1715         } else {
1716             if (in_num) {
1717                 virtio_error(vdev, "Incorrect order for descriptors");
1718                 goto err_undo_map;
1719             }
1720             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1721                                         VIRTQUEUE_MAX_SIZE, false,
1722                                         desc.addr, desc.len);
1723         }
1724         if (!map_ok) {
1725             goto err_undo_map;
1726         }
1727 
1728         /* If we've got too many, that implies a descriptor loop. */
1729         if (++elem_entries > max) {
1730             virtio_error(vdev, "Looped descriptor");
1731             goto err_undo_map;
1732         }
1733 
1734         rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1735                                              desc_cache ==
1736                                              &indirect_desc_cache);
1737     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1738 
1739     /* Now copy what we have collected and mapped */
1740     elem = virtqueue_alloc_element(sz, out_num, in_num);
1741     for (i = 0; i < out_num; i++) {
1742         elem->out_addr[i] = addr[i];
1743         elem->out_sg[i] = iov[i];
1744     }
1745     for (i = 0; i < in_num; i++) {
1746         elem->in_addr[i] = addr[out_num + i];
1747         elem->in_sg[i] = iov[out_num + i];
1748     }
1749 
1750     elem->index = id;
1751     elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1752     vq->last_avail_idx += elem->ndescs;
1753     vq->inuse += elem->ndescs;
1754 
1755     if (vq->last_avail_idx >= vq->vring.num) {
1756         vq->last_avail_idx -= vq->vring.num;
1757         vq->last_avail_wrap_counter ^= 1;
1758     }
1759 
1760     vq->shadow_avail_idx = vq->last_avail_idx;
1761     vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1762 
1763     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1764 done:
1765     address_space_cache_destroy(&indirect_desc_cache);
1766 
1767     return elem;
1768 
1769 err_undo_map:
1770     virtqueue_undo_map_desc(out_num, in_num, iov);
1771     goto done;
1772 }
1773 
1774 void *virtqueue_pop(VirtQueue *vq, size_t sz)
1775 {
1776     if (virtio_device_disabled(vq->vdev)) {
1777         return NULL;
1778     }
1779 
1780     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1781         return virtqueue_packed_pop(vq, sz);
1782     } else {
1783         return virtqueue_split_pop(vq, sz);
1784     }
1785 }
1786 
1787 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1788 {
1789     VRingMemoryRegionCaches *caches;
1790     MemoryRegionCache *desc_cache;
1791     unsigned int dropped = 0;
1792     VirtQueueElement elem = {};
1793     VirtIODevice *vdev = vq->vdev;
1794     VRingPackedDesc desc;
1795 
1796     RCU_READ_LOCK_GUARD();
1797 
1798     caches = vring_get_region_caches(vq);
1799     if (!caches) {
1800         return 0;
1801     }
1802 
1803     desc_cache = &caches->desc;
1804 
1805     virtio_queue_set_notification(vq, 0);
1806 
1807     while (vq->inuse < vq->vring.num) {
1808         unsigned int idx = vq->last_avail_idx;
1809         /*
1810          * works similar to virtqueue_pop but does not map buffers
1811          * and does not allocate any memory.
1812          */
1813         vring_packed_desc_read(vdev, &desc, desc_cache,
1814                                vq->last_avail_idx , true);
1815         if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1816             break;
1817         }
1818         elem.index = desc.id;
1819         elem.ndescs = 1;
1820         while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1821                                                vq->vring.num, &idx, false)) {
1822             ++elem.ndescs;
1823         }
1824         /*
1825          * immediately push the element, nothing to unmap
1826          * as both in_num and out_num are set to 0.
1827          */
1828         virtqueue_push(vq, &elem, 0);
1829         dropped++;
1830         vq->last_avail_idx += elem.ndescs;
1831         if (vq->last_avail_idx >= vq->vring.num) {
1832             vq->last_avail_idx -= vq->vring.num;
1833             vq->last_avail_wrap_counter ^= 1;
1834         }
1835     }
1836 
1837     return dropped;
1838 }
1839 
1840 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1841 {
1842     unsigned int dropped = 0;
1843     VirtQueueElement elem = {};
1844     VirtIODevice *vdev = vq->vdev;
1845     bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1846 
1847     while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1848         /* works similar to virtqueue_pop but does not map buffers
1849         * and does not allocate any memory */
1850         smp_rmb();
1851         if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1852             break;
1853         }
1854         vq->inuse++;
1855         vq->last_avail_idx++;
1856         if (fEventIdx) {
1857             vring_set_avail_event(vq, vq->last_avail_idx);
1858         }
1859         /* immediately push the element, nothing to unmap
1860          * as both in_num and out_num are set to 0 */
1861         virtqueue_push(vq, &elem, 0);
1862         dropped++;
1863     }
1864 
1865     return dropped;
1866 }
1867 
1868 /* virtqueue_drop_all:
1869  * @vq: The #VirtQueue
1870  * Drops all queued buffers and indicates them to the guest
1871  * as if they are done. Useful when buffers can not be
1872  * processed but must be returned to the guest.
1873  */
1874 unsigned int virtqueue_drop_all(VirtQueue *vq)
1875 {
1876     struct VirtIODevice *vdev = vq->vdev;
1877 
1878     if (virtio_device_disabled(vq->vdev)) {
1879         return 0;
1880     }
1881 
1882     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1883         return virtqueue_packed_drop_all(vq);
1884     } else {
1885         return virtqueue_split_drop_all(vq);
1886     }
1887 }
1888 
1889 /* Reading and writing a structure directly to QEMUFile is *awful*, but
1890  * it is what QEMU has always done by mistake.  We can change it sooner
1891  * or later by bumping the version number of the affected vm states.
1892  * In the meanwhile, since the in-memory layout of VirtQueueElement
1893  * has changed, we need to marshal to and from the layout that was
1894  * used before the change.
1895  */
1896 typedef struct VirtQueueElementOld {
1897     unsigned int index;
1898     unsigned int out_num;
1899     unsigned int in_num;
1900     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1901     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1902     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1903     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1904 } VirtQueueElementOld;
1905 
1906 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1907 {
1908     VirtQueueElement *elem;
1909     VirtQueueElementOld data;
1910     int i;
1911 
1912     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1913 
1914     /* TODO: teach all callers that this can fail, and return failure instead
1915      * of asserting here.
1916      * This is just one thing (there are probably more) that must be
1917      * fixed before we can allow NDEBUG compilation.
1918      */
1919     assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1920     assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1921 
1922     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1923     elem->index = data.index;
1924 
1925     for (i = 0; i < elem->in_num; i++) {
1926         elem->in_addr[i] = data.in_addr[i];
1927     }
1928 
1929     for (i = 0; i < elem->out_num; i++) {
1930         elem->out_addr[i] = data.out_addr[i];
1931     }
1932 
1933     for (i = 0; i < elem->in_num; i++) {
1934         /* Base is overwritten by virtqueue_map.  */
1935         elem->in_sg[i].iov_base = 0;
1936         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1937     }
1938 
1939     for (i = 0; i < elem->out_num; i++) {
1940         /* Base is overwritten by virtqueue_map.  */
1941         elem->out_sg[i].iov_base = 0;
1942         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1943     }
1944 
1945     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1946         qemu_get_be32s(f, &elem->ndescs);
1947     }
1948 
1949     virtqueue_map(vdev, elem);
1950     return elem;
1951 }
1952 
1953 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1954                                 VirtQueueElement *elem)
1955 {
1956     VirtQueueElementOld data;
1957     int i;
1958 
1959     memset(&data, 0, sizeof(data));
1960     data.index = elem->index;
1961     data.in_num = elem->in_num;
1962     data.out_num = elem->out_num;
1963 
1964     for (i = 0; i < elem->in_num; i++) {
1965         data.in_addr[i] = elem->in_addr[i];
1966     }
1967 
1968     for (i = 0; i < elem->out_num; i++) {
1969         data.out_addr[i] = elem->out_addr[i];
1970     }
1971 
1972     for (i = 0; i < elem->in_num; i++) {
1973         /* Base is overwritten by virtqueue_map when loading.  Do not
1974          * save it, as it would leak the QEMU address space layout.  */
1975         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1976     }
1977 
1978     for (i = 0; i < elem->out_num; i++) {
1979         /* Do not save iov_base as above.  */
1980         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1981     }
1982 
1983     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1984         qemu_put_be32s(f, &elem->ndescs);
1985     }
1986 
1987     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1988 }
1989 
1990 /* virtio device */
1991 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1992 {
1993     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1994     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1995 
1996     if (virtio_device_disabled(vdev)) {
1997         return;
1998     }
1999 
2000     if (k->notify) {
2001         k->notify(qbus->parent, vector);
2002     }
2003 }
2004 
2005 void virtio_update_irq(VirtIODevice *vdev)
2006 {
2007     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
2008 }
2009 
2010 static int virtio_validate_features(VirtIODevice *vdev)
2011 {
2012     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2013 
2014     if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
2015         !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
2016         return -EFAULT;
2017     }
2018 
2019     if (k->validate_features) {
2020         return k->validate_features(vdev);
2021     } else {
2022         return 0;
2023     }
2024 }
2025 
2026 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
2027 {
2028     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2029     trace_virtio_set_status(vdev, val);
2030 
2031     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2032         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
2033             val & VIRTIO_CONFIG_S_FEATURES_OK) {
2034             int ret = virtio_validate_features(vdev);
2035 
2036             if (ret) {
2037                 return ret;
2038             }
2039         }
2040     }
2041 
2042     if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
2043         (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
2044         virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
2045     }
2046 
2047     if (k->set_status) {
2048         k->set_status(vdev, val);
2049     }
2050     vdev->status = val;
2051 
2052     return 0;
2053 }
2054 
2055 static enum virtio_device_endian virtio_default_endian(void)
2056 {
2057     if (target_words_bigendian()) {
2058         return VIRTIO_DEVICE_ENDIAN_BIG;
2059     } else {
2060         return VIRTIO_DEVICE_ENDIAN_LITTLE;
2061     }
2062 }
2063 
2064 static enum virtio_device_endian virtio_current_cpu_endian(void)
2065 {
2066     if (cpu_virtio_is_big_endian(current_cpu)) {
2067         return VIRTIO_DEVICE_ENDIAN_BIG;
2068     } else {
2069         return VIRTIO_DEVICE_ENDIAN_LITTLE;
2070     }
2071 }
2072 
2073 static void __virtio_queue_reset(VirtIODevice *vdev, uint32_t i)
2074 {
2075     vdev->vq[i].vring.desc = 0;
2076     vdev->vq[i].vring.avail = 0;
2077     vdev->vq[i].vring.used = 0;
2078     vdev->vq[i].last_avail_idx = 0;
2079     vdev->vq[i].shadow_avail_idx = 0;
2080     vdev->vq[i].used_idx = 0;
2081     vdev->vq[i].last_avail_wrap_counter = true;
2082     vdev->vq[i].shadow_avail_wrap_counter = true;
2083     vdev->vq[i].used_wrap_counter = true;
2084     virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2085     vdev->vq[i].signalled_used = 0;
2086     vdev->vq[i].signalled_used_valid = false;
2087     vdev->vq[i].notification = true;
2088     vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2089     vdev->vq[i].inuse = 0;
2090     virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2091 }
2092 
2093 void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
2094 {
2095     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2096 
2097     if (k->queue_reset) {
2098         k->queue_reset(vdev, queue_index);
2099     }
2100 
2101     __virtio_queue_reset(vdev, queue_index);
2102 }
2103 
2104 void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
2105 {
2106     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2107 
2108     /*
2109      * TODO: Seabios is currently out of spec and triggering this error.
2110      * So this needs to be fixed in Seabios, then this can
2111      * be re-enabled for new machine types only, and also after
2112      * being converted to LOG_GUEST_ERROR.
2113      *
2114     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2115         error_report("queue_enable is only supported in devices of virtio "
2116                      "1.0 or later.");
2117     }
2118     */
2119 
2120     if (k->queue_enable) {
2121         k->queue_enable(vdev, queue_index);
2122     }
2123 }
2124 
2125 void virtio_reset(void *opaque)
2126 {
2127     VirtIODevice *vdev = opaque;
2128     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2129     int i;
2130 
2131     virtio_set_status(vdev, 0);
2132     if (current_cpu) {
2133         /* Guest initiated reset */
2134         vdev->device_endian = virtio_current_cpu_endian();
2135     } else {
2136         /* System reset */
2137         vdev->device_endian = virtio_default_endian();
2138     }
2139 
2140     if (vdev->vhost_started && k->get_vhost) {
2141         vhost_reset_device(k->get_vhost(vdev));
2142     }
2143 
2144     if (k->reset) {
2145         k->reset(vdev);
2146     }
2147 
2148     vdev->start_on_kick = false;
2149     vdev->started = false;
2150     vdev->broken = false;
2151     vdev->guest_features = 0;
2152     vdev->queue_sel = 0;
2153     vdev->status = 0;
2154     vdev->disabled = false;
2155     qatomic_set(&vdev->isr, 0);
2156     vdev->config_vector = VIRTIO_NO_VECTOR;
2157     virtio_notify_vector(vdev, vdev->config_vector);
2158 
2159     for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2160         __virtio_queue_reset(vdev, i);
2161     }
2162 }
2163 
2164 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2165 {
2166     if (!vdev->vq[n].vring.num) {
2167         return;
2168     }
2169     vdev->vq[n].vring.desc = addr;
2170     virtio_queue_update_rings(vdev, n);
2171 }
2172 
2173 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2174 {
2175     return vdev->vq[n].vring.desc;
2176 }
2177 
2178 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2179                             hwaddr avail, hwaddr used)
2180 {
2181     if (!vdev->vq[n].vring.num) {
2182         return;
2183     }
2184     vdev->vq[n].vring.desc = desc;
2185     vdev->vq[n].vring.avail = avail;
2186     vdev->vq[n].vring.used = used;
2187     virtio_init_region_cache(vdev, n);
2188 }
2189 
2190 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2191 {
2192     /* Don't allow guest to flip queue between existent and
2193      * nonexistent states, or to set it to an invalid size.
2194      */
2195     if (!!num != !!vdev->vq[n].vring.num ||
2196         num > VIRTQUEUE_MAX_SIZE ||
2197         num < 0) {
2198         return;
2199     }
2200     vdev->vq[n].vring.num = num;
2201 }
2202 
2203 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2204 {
2205     return QLIST_FIRST(&vdev->vector_queues[vector]);
2206 }
2207 
2208 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2209 {
2210     return QLIST_NEXT(vq, node);
2211 }
2212 
2213 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2214 {
2215     return vdev->vq[n].vring.num;
2216 }
2217 
2218 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2219 {
2220     return vdev->vq[n].vring.num_default;
2221 }
2222 
2223 int virtio_get_num_queues(VirtIODevice *vdev)
2224 {
2225     int i;
2226 
2227     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2228         if (!virtio_queue_get_num(vdev, i)) {
2229             break;
2230         }
2231     }
2232 
2233     return i;
2234 }
2235 
2236 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2237 {
2238     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2239     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2240 
2241     /* virtio-1 compliant devices cannot change the alignment */
2242     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2243         error_report("tried to modify queue alignment for virtio-1 device");
2244         return;
2245     }
2246     /* Check that the transport told us it was going to do this
2247      * (so a buggy transport will immediately assert rather than
2248      * silently failing to migrate this state)
2249      */
2250     assert(k->has_variable_vring_alignment);
2251 
2252     if (align) {
2253         vdev->vq[n].vring.align = align;
2254         virtio_queue_update_rings(vdev, n);
2255     }
2256 }
2257 
2258 static void virtio_queue_notify_vq(VirtQueue *vq)
2259 {
2260     if (vq->vring.desc && vq->handle_output) {
2261         VirtIODevice *vdev = vq->vdev;
2262 
2263         if (unlikely(vdev->broken)) {
2264             return;
2265         }
2266 
2267         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2268         vq->handle_output(vdev, vq);
2269 
2270         if (unlikely(vdev->start_on_kick)) {
2271             virtio_set_started(vdev, true);
2272         }
2273     }
2274 }
2275 
2276 void virtio_queue_notify(VirtIODevice *vdev, int n)
2277 {
2278     VirtQueue *vq = &vdev->vq[n];
2279 
2280     if (unlikely(!vq->vring.desc || vdev->broken)) {
2281         return;
2282     }
2283 
2284     trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2285     if (vq->host_notifier_enabled) {
2286         event_notifier_set(&vq->host_notifier);
2287     } else if (vq->handle_output) {
2288         vq->handle_output(vdev, vq);
2289 
2290         if (unlikely(vdev->start_on_kick)) {
2291             virtio_set_started(vdev, true);
2292         }
2293     }
2294 }
2295 
2296 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2297 {
2298     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2299         VIRTIO_NO_VECTOR;
2300 }
2301 
2302 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2303 {
2304     VirtQueue *vq = &vdev->vq[n];
2305 
2306     if (n < VIRTIO_QUEUE_MAX) {
2307         if (vdev->vector_queues &&
2308             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2309             QLIST_REMOVE(vq, node);
2310         }
2311         vdev->vq[n].vector = vector;
2312         if (vdev->vector_queues &&
2313             vector != VIRTIO_NO_VECTOR) {
2314             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2315         }
2316     }
2317 }
2318 
2319 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2320                             VirtIOHandleOutput handle_output)
2321 {
2322     int i;
2323 
2324     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2325         if (vdev->vq[i].vring.num == 0)
2326             break;
2327     }
2328 
2329     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2330         abort();
2331 
2332     vdev->vq[i].vring.num = queue_size;
2333     vdev->vq[i].vring.num_default = queue_size;
2334     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2335     vdev->vq[i].handle_output = handle_output;
2336     vdev->vq[i].used_elems = g_new0(VirtQueueElement, queue_size);
2337 
2338     return &vdev->vq[i];
2339 }
2340 
2341 void virtio_delete_queue(VirtQueue *vq)
2342 {
2343     vq->vring.num = 0;
2344     vq->vring.num_default = 0;
2345     vq->handle_output = NULL;
2346     g_free(vq->used_elems);
2347     vq->used_elems = NULL;
2348     virtio_virtqueue_reset_region_cache(vq);
2349 }
2350 
2351 void virtio_del_queue(VirtIODevice *vdev, int n)
2352 {
2353     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2354         abort();
2355     }
2356 
2357     virtio_delete_queue(&vdev->vq[n]);
2358 }
2359 
2360 static void virtio_set_isr(VirtIODevice *vdev, int value)
2361 {
2362     uint8_t old = qatomic_read(&vdev->isr);
2363 
2364     /* Do not write ISR if it does not change, so that its cacheline remains
2365      * shared in the common case where the guest does not read it.
2366      */
2367     if ((old & value) != value) {
2368         qatomic_or(&vdev->isr, value);
2369     }
2370 }
2371 
2372 /* Called within rcu_read_lock(). */
2373 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2374 {
2375     uint16_t old, new;
2376     bool v;
2377     /* We need to expose used array entries before checking used event. */
2378     smp_mb();
2379     /* Always notify when queue is empty (when feature acknowledge) */
2380     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2381         !vq->inuse && virtio_queue_empty(vq)) {
2382         return true;
2383     }
2384 
2385     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2386         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2387     }
2388 
2389     v = vq->signalled_used_valid;
2390     vq->signalled_used_valid = true;
2391     old = vq->signalled_used;
2392     new = vq->signalled_used = vq->used_idx;
2393     return !v || vring_need_event(vring_get_used_event(vq), new, old);
2394 }
2395 
2396 static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2397                                     uint16_t off_wrap, uint16_t new,
2398                                     uint16_t old)
2399 {
2400     int off = off_wrap & ~(1 << 15);
2401 
2402     if (wrap != off_wrap >> 15) {
2403         off -= vq->vring.num;
2404     }
2405 
2406     return vring_need_event(off, new, old);
2407 }
2408 
2409 /* Called within rcu_read_lock(). */
2410 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2411 {
2412     VRingPackedDescEvent e;
2413     uint16_t old, new;
2414     bool v;
2415     VRingMemoryRegionCaches *caches;
2416 
2417     caches = vring_get_region_caches(vq);
2418     if (!caches) {
2419         return false;
2420     }
2421 
2422     vring_packed_event_read(vdev, &caches->avail, &e);
2423 
2424     old = vq->signalled_used;
2425     new = vq->signalled_used = vq->used_idx;
2426     v = vq->signalled_used_valid;
2427     vq->signalled_used_valid = true;
2428 
2429     if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2430         return false;
2431     } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2432         return true;
2433     }
2434 
2435     return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2436                                          e.off_wrap, new, old);
2437 }
2438 
2439 /* Called within rcu_read_lock().  */
2440 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2441 {
2442     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2443         return virtio_packed_should_notify(vdev, vq);
2444     } else {
2445         return virtio_split_should_notify(vdev, vq);
2446     }
2447 }
2448 
2449 /* Batch irqs while inside a defer_call_begin()/defer_call_end() section */
2450 static void virtio_notify_irqfd_deferred_fn(void *opaque)
2451 {
2452     EventNotifier *notifier = opaque;
2453     VirtQueue *vq = container_of(notifier, VirtQueue, guest_notifier);
2454 
2455     trace_virtio_notify_irqfd_deferred_fn(vq->vdev, vq);
2456     event_notifier_set(notifier);
2457 }
2458 
2459 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2460 {
2461     WITH_RCU_READ_LOCK_GUARD() {
2462         if (!virtio_should_notify(vdev, vq)) {
2463             return;
2464         }
2465     }
2466 
2467     trace_virtio_notify_irqfd(vdev, vq);
2468 
2469     /*
2470      * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2471      * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2472      * incorrectly polling this bit during crashdump and hibernation
2473      * in MSI mode, causing a hang if this bit is never updated.
2474      * Recent releases of Windows do not really shut down, but rather
2475      * log out and hibernate to make the next startup faster.  Hence,
2476      * this manifested as a more serious hang during shutdown with
2477      *
2478      * Next driver release from 2016 fixed this problem, so working around it
2479      * is not a must, but it's easy to do so let's do it here.
2480      *
2481      * Note: it's safe to update ISR from any thread as it was switched
2482      * to an atomic operation.
2483      */
2484     virtio_set_isr(vq->vdev, 0x1);
2485     defer_call(virtio_notify_irqfd_deferred_fn, &vq->guest_notifier);
2486 }
2487 
2488 static void virtio_irq(VirtQueue *vq)
2489 {
2490     virtio_set_isr(vq->vdev, 0x1);
2491     virtio_notify_vector(vq->vdev, vq->vector);
2492 }
2493 
2494 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2495 {
2496     WITH_RCU_READ_LOCK_GUARD() {
2497         if (!virtio_should_notify(vdev, vq)) {
2498             return;
2499         }
2500     }
2501 
2502     trace_virtio_notify(vdev, vq);
2503     virtio_irq(vq);
2504 }
2505 
2506 void virtio_notify_config(VirtIODevice *vdev)
2507 {
2508     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2509         return;
2510 
2511     virtio_set_isr(vdev, 0x3);
2512     vdev->generation++;
2513     virtio_notify_vector(vdev, vdev->config_vector);
2514 }
2515 
2516 static bool virtio_device_endian_needed(void *opaque)
2517 {
2518     VirtIODevice *vdev = opaque;
2519 
2520     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2521     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2522         return vdev->device_endian != virtio_default_endian();
2523     }
2524     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2525     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2526 }
2527 
2528 static bool virtio_64bit_features_needed(void *opaque)
2529 {
2530     VirtIODevice *vdev = opaque;
2531 
2532     return (vdev->host_features >> 32) != 0;
2533 }
2534 
2535 static bool virtio_virtqueue_needed(void *opaque)
2536 {
2537     VirtIODevice *vdev = opaque;
2538 
2539     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2540 }
2541 
2542 static bool virtio_packed_virtqueue_needed(void *opaque)
2543 {
2544     VirtIODevice *vdev = opaque;
2545 
2546     return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2547 }
2548 
2549 static bool virtio_ringsize_needed(void *opaque)
2550 {
2551     VirtIODevice *vdev = opaque;
2552     int i;
2553 
2554     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2555         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2556             return true;
2557         }
2558     }
2559     return false;
2560 }
2561 
2562 static bool virtio_extra_state_needed(void *opaque)
2563 {
2564     VirtIODevice *vdev = opaque;
2565     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2566     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2567 
2568     return k->has_extra_state &&
2569         k->has_extra_state(qbus->parent);
2570 }
2571 
2572 static bool virtio_broken_needed(void *opaque)
2573 {
2574     VirtIODevice *vdev = opaque;
2575 
2576     return vdev->broken;
2577 }
2578 
2579 static bool virtio_started_needed(void *opaque)
2580 {
2581     VirtIODevice *vdev = opaque;
2582 
2583     return vdev->started;
2584 }
2585 
2586 static bool virtio_disabled_needed(void *opaque)
2587 {
2588     VirtIODevice *vdev = opaque;
2589 
2590     return vdev->disabled;
2591 }
2592 
2593 static const VMStateDescription vmstate_virtqueue = {
2594     .name = "virtqueue_state",
2595     .version_id = 1,
2596     .minimum_version_id = 1,
2597     .fields = (const VMStateField[]) {
2598         VMSTATE_UINT64(vring.avail, struct VirtQueue),
2599         VMSTATE_UINT64(vring.used, struct VirtQueue),
2600         VMSTATE_END_OF_LIST()
2601     }
2602 };
2603 
2604 static const VMStateDescription vmstate_packed_virtqueue = {
2605     .name = "packed_virtqueue_state",
2606     .version_id = 1,
2607     .minimum_version_id = 1,
2608     .fields = (const VMStateField[]) {
2609         VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2610         VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2611         VMSTATE_UINT16(used_idx, struct VirtQueue),
2612         VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2613         VMSTATE_UINT32(inuse, struct VirtQueue),
2614         VMSTATE_END_OF_LIST()
2615     }
2616 };
2617 
2618 static const VMStateDescription vmstate_virtio_virtqueues = {
2619     .name = "virtio/virtqueues",
2620     .version_id = 1,
2621     .minimum_version_id = 1,
2622     .needed = &virtio_virtqueue_needed,
2623     .fields = (const VMStateField[]) {
2624         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2625                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2626         VMSTATE_END_OF_LIST()
2627     }
2628 };
2629 
2630 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2631     .name = "virtio/packed_virtqueues",
2632     .version_id = 1,
2633     .minimum_version_id = 1,
2634     .needed = &virtio_packed_virtqueue_needed,
2635     .fields = (const VMStateField[]) {
2636         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2637                       VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2638         VMSTATE_END_OF_LIST()
2639     }
2640 };
2641 
2642 static const VMStateDescription vmstate_ringsize = {
2643     .name = "ringsize_state",
2644     .version_id = 1,
2645     .minimum_version_id = 1,
2646     .fields = (const VMStateField[]) {
2647         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2648         VMSTATE_END_OF_LIST()
2649     }
2650 };
2651 
2652 static const VMStateDescription vmstate_virtio_ringsize = {
2653     .name = "virtio/ringsize",
2654     .version_id = 1,
2655     .minimum_version_id = 1,
2656     .needed = &virtio_ringsize_needed,
2657     .fields = (const VMStateField[]) {
2658         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2659                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2660         VMSTATE_END_OF_LIST()
2661     }
2662 };
2663 
2664 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2665                            const VMStateField *field)
2666 {
2667     VirtIODevice *vdev = pv;
2668     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2669     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2670 
2671     if (!k->load_extra_state) {
2672         return -1;
2673     } else {
2674         return k->load_extra_state(qbus->parent, f);
2675     }
2676 }
2677 
2678 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2679                            const VMStateField *field, JSONWriter *vmdesc)
2680 {
2681     VirtIODevice *vdev = pv;
2682     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2683     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2684 
2685     k->save_extra_state(qbus->parent, f);
2686     return 0;
2687 }
2688 
2689 static const VMStateInfo vmstate_info_extra_state = {
2690     .name = "virtqueue_extra_state",
2691     .get = get_extra_state,
2692     .put = put_extra_state,
2693 };
2694 
2695 static const VMStateDescription vmstate_virtio_extra_state = {
2696     .name = "virtio/extra_state",
2697     .version_id = 1,
2698     .minimum_version_id = 1,
2699     .needed = &virtio_extra_state_needed,
2700     .fields = (const VMStateField[]) {
2701         {
2702             .name         = "extra_state",
2703             .version_id   = 0,
2704             .field_exists = NULL,
2705             .size         = 0,
2706             .info         = &vmstate_info_extra_state,
2707             .flags        = VMS_SINGLE,
2708             .offset       = 0,
2709         },
2710         VMSTATE_END_OF_LIST()
2711     }
2712 };
2713 
2714 static const VMStateDescription vmstate_virtio_device_endian = {
2715     .name = "virtio/device_endian",
2716     .version_id = 1,
2717     .minimum_version_id = 1,
2718     .needed = &virtio_device_endian_needed,
2719     .fields = (const VMStateField[]) {
2720         VMSTATE_UINT8(device_endian, VirtIODevice),
2721         VMSTATE_END_OF_LIST()
2722     }
2723 };
2724 
2725 static const VMStateDescription vmstate_virtio_64bit_features = {
2726     .name = "virtio/64bit_features",
2727     .version_id = 1,
2728     .minimum_version_id = 1,
2729     .needed = &virtio_64bit_features_needed,
2730     .fields = (const VMStateField[]) {
2731         VMSTATE_UINT64(guest_features, VirtIODevice),
2732         VMSTATE_END_OF_LIST()
2733     }
2734 };
2735 
2736 static const VMStateDescription vmstate_virtio_broken = {
2737     .name = "virtio/broken",
2738     .version_id = 1,
2739     .minimum_version_id = 1,
2740     .needed = &virtio_broken_needed,
2741     .fields = (const VMStateField[]) {
2742         VMSTATE_BOOL(broken, VirtIODevice),
2743         VMSTATE_END_OF_LIST()
2744     }
2745 };
2746 
2747 static const VMStateDescription vmstate_virtio_started = {
2748     .name = "virtio/started",
2749     .version_id = 1,
2750     .minimum_version_id = 1,
2751     .needed = &virtio_started_needed,
2752     .fields = (const VMStateField[]) {
2753         VMSTATE_BOOL(started, VirtIODevice),
2754         VMSTATE_END_OF_LIST()
2755     }
2756 };
2757 
2758 static const VMStateDescription vmstate_virtio_disabled = {
2759     .name = "virtio/disabled",
2760     .version_id = 1,
2761     .minimum_version_id = 1,
2762     .needed = &virtio_disabled_needed,
2763     .fields = (const VMStateField[]) {
2764         VMSTATE_BOOL(disabled, VirtIODevice),
2765         VMSTATE_END_OF_LIST()
2766     }
2767 };
2768 
2769 static const VMStateDescription vmstate_virtio = {
2770     .name = "virtio",
2771     .version_id = 1,
2772     .minimum_version_id = 1,
2773     .fields = (const VMStateField[]) {
2774         VMSTATE_END_OF_LIST()
2775     },
2776     .subsections = (const VMStateDescription * const []) {
2777         &vmstate_virtio_device_endian,
2778         &vmstate_virtio_64bit_features,
2779         &vmstate_virtio_virtqueues,
2780         &vmstate_virtio_ringsize,
2781         &vmstate_virtio_broken,
2782         &vmstate_virtio_extra_state,
2783         &vmstate_virtio_started,
2784         &vmstate_virtio_packed_virtqueues,
2785         &vmstate_virtio_disabled,
2786         NULL
2787     }
2788 };
2789 
2790 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2791 {
2792     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2793     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2794     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2795     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2796     int i;
2797 
2798     if (k->save_config) {
2799         k->save_config(qbus->parent, f);
2800     }
2801 
2802     qemu_put_8s(f, &vdev->status);
2803     qemu_put_8s(f, &vdev->isr);
2804     qemu_put_be16s(f, &vdev->queue_sel);
2805     qemu_put_be32s(f, &guest_features_lo);
2806     qemu_put_be32(f, vdev->config_len);
2807     qemu_put_buffer(f, vdev->config, vdev->config_len);
2808 
2809     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2810         if (vdev->vq[i].vring.num == 0)
2811             break;
2812     }
2813 
2814     qemu_put_be32(f, i);
2815 
2816     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2817         if (vdev->vq[i].vring.num == 0)
2818             break;
2819 
2820         qemu_put_be32(f, vdev->vq[i].vring.num);
2821         if (k->has_variable_vring_alignment) {
2822             qemu_put_be32(f, vdev->vq[i].vring.align);
2823         }
2824         /*
2825          * Save desc now, the rest of the ring addresses are saved in
2826          * subsections for VIRTIO-1 devices.
2827          */
2828         qemu_put_be64(f, vdev->vq[i].vring.desc);
2829         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2830         if (k->save_queue) {
2831             k->save_queue(qbus->parent, i, f);
2832         }
2833     }
2834 
2835     if (vdc->save != NULL) {
2836         vdc->save(vdev, f);
2837     }
2838 
2839     if (vdc->vmsd) {
2840         int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2841         if (ret) {
2842             return ret;
2843         }
2844     }
2845 
2846     /* Subsections */
2847     return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2848 }
2849 
2850 /* A wrapper for use as a VMState .put function */
2851 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2852                               const VMStateField *field, JSONWriter *vmdesc)
2853 {
2854     return virtio_save(VIRTIO_DEVICE(opaque), f);
2855 }
2856 
2857 /* A wrapper for use as a VMState .get function */
2858 static int coroutine_mixed_fn
2859 virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2860                   const VMStateField *field)
2861 {
2862     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2863     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2864 
2865     return virtio_load(vdev, f, dc->vmsd->version_id);
2866 }
2867 
2868 const VMStateInfo  virtio_vmstate_info = {
2869     .name = "virtio",
2870     .get = virtio_device_get,
2871     .put = virtio_device_put,
2872 };
2873 
2874 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2875 {
2876     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2877     bool bad = (val & ~(vdev->host_features)) != 0;
2878 
2879     val &= vdev->host_features;
2880     if (k->set_features) {
2881         k->set_features(vdev, val);
2882     }
2883     vdev->guest_features = val;
2884     return bad ? -1 : 0;
2885 }
2886 
2887 typedef struct VirtioSetFeaturesNocheckData {
2888     Coroutine *co;
2889     VirtIODevice *vdev;
2890     uint64_t val;
2891     int ret;
2892 } VirtioSetFeaturesNocheckData;
2893 
2894 static void virtio_set_features_nocheck_bh(void *opaque)
2895 {
2896     VirtioSetFeaturesNocheckData *data = opaque;
2897 
2898     data->ret = virtio_set_features_nocheck(data->vdev, data->val);
2899     aio_co_wake(data->co);
2900 }
2901 
2902 static int coroutine_mixed_fn
2903 virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, uint64_t val)
2904 {
2905     if (qemu_in_coroutine()) {
2906         VirtioSetFeaturesNocheckData data = {
2907             .co = qemu_coroutine_self(),
2908             .vdev = vdev,
2909             .val = val,
2910         };
2911         aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
2912                                 virtio_set_features_nocheck_bh, &data);
2913         qemu_coroutine_yield();
2914         return data.ret;
2915     } else {
2916         return virtio_set_features_nocheck(vdev, val);
2917     }
2918 }
2919 
2920 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2921 {
2922     int ret;
2923     /*
2924      * The driver must not attempt to set features after feature negotiation
2925      * has finished.
2926      */
2927     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2928         return -EINVAL;
2929     }
2930 
2931     if (val & (1ull << VIRTIO_F_BAD_FEATURE)) {
2932         qemu_log_mask(LOG_GUEST_ERROR,
2933                       "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n",
2934                       __func__, vdev->name);
2935     }
2936 
2937     ret = virtio_set_features_nocheck(vdev, val);
2938     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2939         /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2940         int i;
2941         for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2942             if (vdev->vq[i].vring.num != 0) {
2943                 virtio_init_region_cache(vdev, i);
2944             }
2945         }
2946     }
2947     if (!ret) {
2948         if (!virtio_device_started(vdev, vdev->status) &&
2949             !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2950             vdev->start_on_kick = true;
2951         }
2952     }
2953     return ret;
2954 }
2955 
2956 size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
2957                               uint64_t host_features)
2958 {
2959     size_t config_size = params->min_size;
2960     const VirtIOFeature *feature_sizes = params->feature_sizes;
2961     size_t i;
2962 
2963     for (i = 0; feature_sizes[i].flags != 0; i++) {
2964         if (host_features & feature_sizes[i].flags) {
2965             config_size = MAX(feature_sizes[i].end, config_size);
2966         }
2967     }
2968 
2969     assert(config_size <= params->max_size);
2970     return config_size;
2971 }
2972 
2973 int coroutine_mixed_fn
2974 virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
2975 {
2976     int i, ret;
2977     int32_t config_len;
2978     uint32_t num;
2979     uint32_t features;
2980     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2981     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2982     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2983 
2984     /*
2985      * We poison the endianness to ensure it does not get used before
2986      * subsections have been loaded.
2987      */
2988     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
2989 
2990     if (k->load_config) {
2991         ret = k->load_config(qbus->parent, f);
2992         if (ret)
2993             return ret;
2994     }
2995 
2996     qemu_get_8s(f, &vdev->status);
2997     qemu_get_8s(f, &vdev->isr);
2998     qemu_get_be16s(f, &vdev->queue_sel);
2999     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3000         return -1;
3001     }
3002     qemu_get_be32s(f, &features);
3003 
3004     /*
3005      * Temporarily set guest_features low bits - needed by
3006      * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3007      * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3008      *
3009      * Note: devices should always test host features in future - don't create
3010      * new dependencies like this.
3011      */
3012     vdev->guest_features = features;
3013 
3014     config_len = qemu_get_be32(f);
3015 
3016     /*
3017      * There are cases where the incoming config can be bigger or smaller
3018      * than what we have; so load what we have space for, and skip
3019      * any excess that's in the stream.
3020      */
3021     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3022 
3023     while (config_len > vdev->config_len) {
3024         qemu_get_byte(f);
3025         config_len--;
3026     }
3027 
3028     num = qemu_get_be32(f);
3029 
3030     if (num > VIRTIO_QUEUE_MAX) {
3031         error_report("Invalid number of virtqueues: 0x%x", num);
3032         return -1;
3033     }
3034 
3035     for (i = 0; i < num; i++) {
3036         vdev->vq[i].vring.num = qemu_get_be32(f);
3037         if (k->has_variable_vring_alignment) {
3038             vdev->vq[i].vring.align = qemu_get_be32(f);
3039         }
3040         vdev->vq[i].vring.desc = qemu_get_be64(f);
3041         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3042         vdev->vq[i].signalled_used_valid = false;
3043         vdev->vq[i].notification = true;
3044 
3045         if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3046             error_report("VQ %d address 0x0 "
3047                          "inconsistent with Host index 0x%x",
3048                          i, vdev->vq[i].last_avail_idx);
3049             return -1;
3050         }
3051         if (k->load_queue) {
3052             ret = k->load_queue(qbus->parent, i, f);
3053             if (ret)
3054                 return ret;
3055         }
3056     }
3057 
3058     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3059 
3060     if (vdc->load != NULL) {
3061         ret = vdc->load(vdev, f, version_id);
3062         if (ret) {
3063             return ret;
3064         }
3065     }
3066 
3067     if (vdc->vmsd) {
3068         ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3069         if (ret) {
3070             return ret;
3071         }
3072     }
3073 
3074     /* Subsections */
3075     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3076     if (ret) {
3077         return ret;
3078     }
3079 
3080     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3081         vdev->device_endian = virtio_default_endian();
3082     }
3083 
3084     if (virtio_64bit_features_needed(vdev)) {
3085         /*
3086          * Subsection load filled vdev->guest_features.  Run them
3087          * through virtio_set_features to sanity-check them against
3088          * host_features.
3089          */
3090         uint64_t features64 = vdev->guest_features;
3091         if (virtio_set_features_nocheck_maybe_co(vdev, features64) < 0) {
3092             error_report("Features 0x%" PRIx64 " unsupported. "
3093                          "Allowed features: 0x%" PRIx64,
3094                          features64, vdev->host_features);
3095             return -1;
3096         }
3097     } else {
3098         if (virtio_set_features_nocheck_maybe_co(vdev, features) < 0) {
3099             error_report("Features 0x%x unsupported. "
3100                          "Allowed features: 0x%" PRIx64,
3101                          features, vdev->host_features);
3102             return -1;
3103         }
3104     }
3105 
3106     if (!virtio_device_started(vdev, vdev->status) &&
3107         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3108         vdev->start_on_kick = true;
3109     }
3110 
3111     RCU_READ_LOCK_GUARD();
3112     for (i = 0; i < num; i++) {
3113         if (vdev->vq[i].vring.desc) {
3114             uint16_t nheads;
3115 
3116             /*
3117              * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3118              * only the region cache needs to be set up.  Legacy devices need
3119              * to calculate used and avail ring addresses based on the desc
3120              * address.
3121              */
3122             if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3123                 virtio_init_region_cache(vdev, i);
3124             } else {
3125                 virtio_queue_update_rings(vdev, i);
3126             }
3127 
3128             if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3129                 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3130                 vdev->vq[i].shadow_avail_wrap_counter =
3131                                         vdev->vq[i].last_avail_wrap_counter;
3132                 continue;
3133             }
3134 
3135             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3136             /* Check it isn't doing strange things with descriptor numbers. */
3137             if (nheads > vdev->vq[i].vring.num) {
3138                 virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3139                              "inconsistent with Host index 0x%x: delta 0x%x",
3140                              i, vdev->vq[i].vring.num,
3141                              vring_avail_idx(&vdev->vq[i]),
3142                              vdev->vq[i].last_avail_idx, nheads);
3143                 vdev->vq[i].used_idx = 0;
3144                 vdev->vq[i].shadow_avail_idx = 0;
3145                 vdev->vq[i].inuse = 0;
3146                 continue;
3147             }
3148             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3149             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3150 
3151             /*
3152              * Some devices migrate VirtQueueElements that have been popped
3153              * from the avail ring but not yet returned to the used ring.
3154              * Since max ring size < UINT16_MAX it's safe to use modulo
3155              * UINT16_MAX + 1 subtraction.
3156              */
3157             vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3158                                 vdev->vq[i].used_idx);
3159             if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3160                 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3161                              "used_idx 0x%x",
3162                              i, vdev->vq[i].vring.num,
3163                              vdev->vq[i].last_avail_idx,
3164                              vdev->vq[i].used_idx);
3165                 return -1;
3166             }
3167         }
3168     }
3169 
3170     if (vdc->post_load) {
3171         ret = vdc->post_load(vdev);
3172         if (ret) {
3173             return ret;
3174         }
3175     }
3176 
3177     return 0;
3178 }
3179 
3180 void virtio_cleanup(VirtIODevice *vdev)
3181 {
3182     qemu_del_vm_change_state_handler(vdev->vmstate);
3183 }
3184 
3185 static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3186 {
3187     VirtIODevice *vdev = opaque;
3188     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3189     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3190     bool backend_run = running && virtio_device_started(vdev, vdev->status);
3191     vdev->vm_running = running;
3192 
3193     if (backend_run) {
3194         virtio_set_status(vdev, vdev->status);
3195     }
3196 
3197     if (k->vmstate_change) {
3198         k->vmstate_change(qbus->parent, backend_run);
3199     }
3200 
3201     if (!backend_run) {
3202         virtio_set_status(vdev, vdev->status);
3203     }
3204 }
3205 
3206 void virtio_instance_init_common(Object *proxy_obj, void *data,
3207                                  size_t vdev_size, const char *vdev_name)
3208 {
3209     DeviceState *vdev = data;
3210 
3211     object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3212                                        vdev_size, vdev_name, &error_abort,
3213                                        NULL);
3214     qdev_alias_all_properties(vdev, proxy_obj);
3215 }
3216 
3217 void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
3218 {
3219     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3220     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3221     int i;
3222     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3223 
3224     if (nvectors) {
3225         vdev->vector_queues =
3226             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3227     }
3228 
3229     vdev->start_on_kick = false;
3230     vdev->started = false;
3231     vdev->vhost_started = false;
3232     vdev->device_id = device_id;
3233     vdev->status = 0;
3234     qatomic_set(&vdev->isr, 0);
3235     vdev->queue_sel = 0;
3236     vdev->config_vector = VIRTIO_NO_VECTOR;
3237     vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
3238     vdev->vm_running = runstate_is_running();
3239     vdev->broken = false;
3240     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3241         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3242         vdev->vq[i].vdev = vdev;
3243         vdev->vq[i].queue_index = i;
3244         vdev->vq[i].host_notifier_enabled = false;
3245     }
3246 
3247     vdev->name = virtio_id_to_name(device_id);
3248     vdev->config_len = config_size;
3249     if (vdev->config_len) {
3250         vdev->config = g_malloc0(config_size);
3251     } else {
3252         vdev->config = NULL;
3253     }
3254     vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3255             virtio_vmstate_change, vdev);
3256     vdev->device_endian = virtio_default_endian();
3257     vdev->use_guest_notifier_mask = true;
3258 }
3259 
3260 /*
3261  * Only devices that have already been around prior to defining the virtio
3262  * standard support legacy mode; this includes devices not specified in the
3263  * standard. All newer devices conform to the virtio standard only.
3264  */
3265 bool virtio_legacy_allowed(VirtIODevice *vdev)
3266 {
3267     switch (vdev->device_id) {
3268     case VIRTIO_ID_NET:
3269     case VIRTIO_ID_BLOCK:
3270     case VIRTIO_ID_CONSOLE:
3271     case VIRTIO_ID_RNG:
3272     case VIRTIO_ID_BALLOON:
3273     case VIRTIO_ID_RPMSG:
3274     case VIRTIO_ID_SCSI:
3275     case VIRTIO_ID_9P:
3276     case VIRTIO_ID_RPROC_SERIAL:
3277     case VIRTIO_ID_CAIF:
3278         return true;
3279     default:
3280         return false;
3281     }
3282 }
3283 
3284 bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3285 {
3286     return vdev->disable_legacy_check;
3287 }
3288 
3289 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3290 {
3291     return vdev->vq[n].vring.desc;
3292 }
3293 
3294 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3295 {
3296     return virtio_queue_get_desc_addr(vdev, n) != 0;
3297 }
3298 
3299 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3300 {
3301     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3302     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3303 
3304     if (k->queue_enabled) {
3305         return k->queue_enabled(qbus->parent, n);
3306     }
3307     return virtio_queue_enabled_legacy(vdev, n);
3308 }
3309 
3310 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3311 {
3312     return vdev->vq[n].vring.avail;
3313 }
3314 
3315 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3316 {
3317     return vdev->vq[n].vring.used;
3318 }
3319 
3320 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3321 {
3322     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3323 }
3324 
3325 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3326 {
3327     int s;
3328 
3329     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3330         return sizeof(struct VRingPackedDescEvent);
3331     }
3332 
3333     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3334     return offsetof(VRingAvail, ring) +
3335         sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3336 }
3337 
3338 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3339 {
3340     int s;
3341 
3342     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3343         return sizeof(struct VRingPackedDescEvent);
3344     }
3345 
3346     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3347     return offsetof(VRingUsed, ring) +
3348         sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3349 }
3350 
3351 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3352                                                            int n)
3353 {
3354     unsigned int avail, used;
3355 
3356     avail = vdev->vq[n].last_avail_idx;
3357     avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3358 
3359     used = vdev->vq[n].used_idx;
3360     used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3361 
3362     return avail | used << 16;
3363 }
3364 
3365 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3366                                                       int n)
3367 {
3368     return vdev->vq[n].last_avail_idx;
3369 }
3370 
3371 static uint32_t virtio_queue_split_get_vring_states(VirtIODevice *vdev,
3372                                                       int n)
3373 {
3374     struct VirtQueue *vq = &vdev->vq[n];
3375     uint16_t avail, used;
3376 
3377     avail = vq->last_avail_idx;
3378     used = vq->used_idx;
3379 
3380     return avail | (uint32_t)used << 16;
3381 }
3382 
3383 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3384 {
3385     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3386         return virtio_queue_packed_get_last_avail_idx(vdev, n);
3387     } else {
3388         return virtio_queue_split_get_last_avail_idx(vdev, n);
3389     }
3390 }
3391 
3392 unsigned int virtio_queue_get_vring_states(VirtIODevice *vdev, int n)
3393 {
3394     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3395         return -1;
3396     } else {
3397         return virtio_queue_split_get_vring_states(vdev, n);
3398     }
3399 }
3400 
3401 static void virtio_queue_split_set_vring_states(VirtIODevice *vdev,
3402                                                 int n, uint32_t idx)
3403 {
3404     struct VirtQueue *vq = &vdev->vq[n];
3405     vq->last_avail_idx = (uint16_t)(idx & 0xffff);
3406     vq->shadow_avail_idx = (uint16_t)(idx & 0xffff);
3407     vq->used_idx = (uint16_t)(idx >> 16);
3408 }
3409 
3410 void virtio_queue_set_vring_states(VirtIODevice *vdev, int n, uint32_t idx)
3411 {
3412     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3413         return;
3414     } else {
3415         virtio_queue_split_set_vring_states(vdev, n, idx);
3416     }
3417 }
3418 
3419 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3420                                                    int n, unsigned int idx)
3421 {
3422     struct VirtQueue *vq = &vdev->vq[n];
3423 
3424     vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3425     vq->last_avail_wrap_counter =
3426         vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3427     idx >>= 16;
3428     vq->used_idx = idx & 0x7fff;
3429     vq->used_wrap_counter = !!(idx & 0x8000);
3430 }
3431 
3432 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3433                                                   int n, unsigned int idx)
3434 {
3435         vdev->vq[n].last_avail_idx = idx;
3436         vdev->vq[n].shadow_avail_idx = idx;
3437 }
3438 
3439 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3440                                      unsigned int idx)
3441 {
3442     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3443         virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3444     } else {
3445         virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3446     }
3447 }
3448 
3449 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3450                                                        int n)
3451 {
3452     /* We don't have a reference like avail idx in shared memory */
3453     return;
3454 }
3455 
3456 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3457                                                       int n)
3458 {
3459     RCU_READ_LOCK_GUARD();
3460     if (vdev->vq[n].vring.desc) {
3461         vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3462         vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3463     }
3464 }
3465 
3466 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3467 {
3468     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3469         virtio_queue_packed_restore_last_avail_idx(vdev, n);
3470     } else {
3471         virtio_queue_split_restore_last_avail_idx(vdev, n);
3472     }
3473 }
3474 
3475 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3476 {
3477     /* used idx was updated through set_last_avail_idx() */
3478     return;
3479 }
3480 
3481 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3482 {
3483     RCU_READ_LOCK_GUARD();
3484     if (vdev->vq[n].vring.desc) {
3485         vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3486     }
3487 }
3488 
3489 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3490 {
3491     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3492         return virtio_queue_packed_update_used_idx(vdev, n);
3493     } else {
3494         return virtio_split_packed_update_used_idx(vdev, n);
3495     }
3496 }
3497 
3498 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3499 {
3500     vdev->vq[n].signalled_used_valid = false;
3501 }
3502 
3503 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3504 {
3505     return vdev->vq + n;
3506 }
3507 
3508 uint16_t virtio_get_queue_index(VirtQueue *vq)
3509 {
3510     return vq->queue_index;
3511 }
3512 
3513 static void virtio_queue_guest_notifier_read(EventNotifier *n)
3514 {
3515     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3516     if (event_notifier_test_and_clear(n)) {
3517         virtio_irq(vq);
3518     }
3519 }
3520 static void virtio_config_guest_notifier_read(EventNotifier *n)
3521 {
3522     VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier);
3523 
3524     if (event_notifier_test_and_clear(n)) {
3525         virtio_notify_config(vdev);
3526     }
3527 }
3528 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3529                                                 bool with_irqfd)
3530 {
3531     if (assign && !with_irqfd) {
3532         event_notifier_set_handler(&vq->guest_notifier,
3533                                    virtio_queue_guest_notifier_read);
3534     } else {
3535         event_notifier_set_handler(&vq->guest_notifier, NULL);
3536     }
3537     if (!assign) {
3538         /* Test and clear notifier before closing it,
3539          * in case poll callback didn't have time to run. */
3540         virtio_queue_guest_notifier_read(&vq->guest_notifier);
3541     }
3542 }
3543 
3544 void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
3545                                                  bool assign, bool with_irqfd)
3546 {
3547     EventNotifier *n;
3548     n = &vdev->config_notifier;
3549     if (assign && !with_irqfd) {
3550         event_notifier_set_handler(n, virtio_config_guest_notifier_read);
3551     } else {
3552         event_notifier_set_handler(n, NULL);
3553     }
3554     if (!assign) {
3555         /* Test and clear notifier before closing it,*/
3556         /* in case poll callback didn't have time to run. */
3557         virtio_config_guest_notifier_read(n);
3558     }
3559 }
3560 
3561 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3562 {
3563     return &vq->guest_notifier;
3564 }
3565 
3566 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3567 {
3568     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3569 
3570     virtio_queue_set_notification(vq, 0);
3571 }
3572 
3573 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3574 {
3575     EventNotifier *n = opaque;
3576     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3577 
3578     return vq->vring.desc && !virtio_queue_empty(vq);
3579 }
3580 
3581 static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n)
3582 {
3583     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3584 
3585     virtio_queue_notify_vq(vq);
3586 }
3587 
3588 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3589 {
3590     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3591 
3592     /* Caller polls once more after this to catch requests that race with us */
3593     virtio_queue_set_notification(vq, 1);
3594 }
3595 
3596 void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
3597 {
3598     /*
3599      * virtio_queue_aio_detach_host_notifier() can leave notifications disabled.
3600      * Re-enable them.  (And if detach has not been used before, notifications
3601      * being enabled is still the default state while a notifier is attached;
3602      * see virtio_queue_host_notifier_aio_poll_end(), which will always leave
3603      * notifications enabled once the polling section is left.)
3604      */
3605     if (!virtio_queue_get_notification(vq)) {
3606         virtio_queue_set_notification(vq, 1);
3607     }
3608 
3609     aio_set_event_notifier(ctx, &vq->host_notifier,
3610                            virtio_queue_host_notifier_read,
3611                            virtio_queue_host_notifier_aio_poll,
3612                            virtio_queue_host_notifier_aio_poll_ready);
3613     aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3614                                 virtio_queue_host_notifier_aio_poll_begin,
3615                                 virtio_queue_host_notifier_aio_poll_end);
3616 
3617     /*
3618      * We will have ignored notifications about new requests from the guest
3619      * while no notifiers were attached, so "kick" the virt queue to process
3620      * those requests now.
3621      */
3622     event_notifier_set(&vq->host_notifier);
3623 }
3624 
3625 /*
3626  * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use
3627  * this for rx virtqueues and similar cases where the virtqueue handler
3628  * function does not pop all elements. When the virtqueue is left non-empty
3629  * polling consumes CPU cycles and should not be used.
3630  */
3631 void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
3632 {
3633     /* See virtio_queue_aio_attach_host_notifier() */
3634     if (!virtio_queue_get_notification(vq)) {
3635         virtio_queue_set_notification(vq, 1);
3636     }
3637 
3638     aio_set_event_notifier(ctx, &vq->host_notifier,
3639                            virtio_queue_host_notifier_read,
3640                            NULL, NULL);
3641 
3642     /*
3643      * See virtio_queue_aio_attach_host_notifier().
3644      * Note that this may be unnecessary for the type of virtqueues this
3645      * function is used for.  Still, it will not hurt to have a quick look into
3646      * whether we can/should process any of the virtqueue elements.
3647      */
3648     event_notifier_set(&vq->host_notifier);
3649 }
3650 
3651 void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
3652 {
3653     aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL);
3654 
3655     /*
3656      * aio_set_event_notifier_poll() does not guarantee whether io_poll_end()
3657      * will run after io_poll_begin(), so by removing the notifier, we do not
3658      * know whether virtio_queue_host_notifier_aio_poll_end() has run after a
3659      * previous virtio_queue_host_notifier_aio_poll_begin(), i.e. whether
3660      * notifications are enabled or disabled.  It does not really matter anyway;
3661      * we just removed the notifier, so we do not care about notifications until
3662      * we potentially re-attach it.  The attach_host_notifier functions will
3663      * ensure that notifications are enabled again when they are needed.
3664      */
3665 }
3666 
3667 void virtio_queue_host_notifier_read(EventNotifier *n)
3668 {
3669     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3670     if (event_notifier_test_and_clear(n)) {
3671         virtio_queue_notify_vq(vq);
3672     }
3673 }
3674 
3675 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3676 {
3677     return &vq->host_notifier;
3678 }
3679 
3680 EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev)
3681 {
3682     return &vdev->config_notifier;
3683 }
3684 
3685 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3686 {
3687     vq->host_notifier_enabled = enabled;
3688 }
3689 
3690 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3691                                       MemoryRegion *mr, bool assign)
3692 {
3693     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3694     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3695 
3696     if (k->set_host_notifier_mr) {
3697         return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3698     }
3699 
3700     return -1;
3701 }
3702 
3703 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3704 {
3705     g_free(vdev->bus_name);
3706     vdev->bus_name = g_strdup(bus_name);
3707 }
3708 
3709 void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3710 {
3711     va_list ap;
3712 
3713     va_start(ap, fmt);
3714     error_vreport(fmt, ap);
3715     va_end(ap);
3716 
3717     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3718         vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3719         virtio_notify_config(vdev);
3720     }
3721 
3722     vdev->broken = true;
3723 }
3724 
3725 static void virtio_memory_listener_commit(MemoryListener *listener)
3726 {
3727     VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3728     int i;
3729 
3730     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3731         if (vdev->vq[i].vring.num == 0) {
3732             break;
3733         }
3734         virtio_init_region_cache(vdev, i);
3735     }
3736 }
3737 
3738 static void virtio_device_realize(DeviceState *dev, Error **errp)
3739 {
3740     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3741     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3742     Error *err = NULL;
3743 
3744     /* Devices should either use vmsd or the load/save methods */
3745     assert(!vdc->vmsd || !vdc->load);
3746 
3747     if (vdc->realize != NULL) {
3748         vdc->realize(dev, &err);
3749         if (err != NULL) {
3750             error_propagate(errp, err);
3751             return;
3752         }
3753     }
3754 
3755     virtio_bus_device_plugged(vdev, &err);
3756     if (err != NULL) {
3757         error_propagate(errp, err);
3758         vdc->unrealize(dev);
3759         return;
3760     }
3761 
3762     vdev->listener.commit = virtio_memory_listener_commit;
3763     vdev->listener.name = "virtio";
3764     memory_listener_register(&vdev->listener, vdev->dma_as);
3765 }
3766 
3767 static void virtio_device_unrealize(DeviceState *dev)
3768 {
3769     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3770     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3771 
3772     memory_listener_unregister(&vdev->listener);
3773     virtio_bus_device_unplugged(vdev);
3774 
3775     if (vdc->unrealize != NULL) {
3776         vdc->unrealize(dev);
3777     }
3778 
3779     g_free(vdev->bus_name);
3780     vdev->bus_name = NULL;
3781 }
3782 
3783 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3784 {
3785     int i;
3786     if (!vdev->vq) {
3787         return;
3788     }
3789 
3790     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3791         if (vdev->vq[i].vring.num == 0) {
3792             break;
3793         }
3794         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3795     }
3796     g_free(vdev->vq);
3797 }
3798 
3799 static void virtio_device_instance_finalize(Object *obj)
3800 {
3801     VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3802 
3803     virtio_device_free_virtqueues(vdev);
3804 
3805     g_free(vdev->config);
3806     g_free(vdev->vector_queues);
3807 }
3808 
3809 static Property virtio_properties[] = {
3810     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3811     DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3812     DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3813     DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3814                      disable_legacy_check, false),
3815     DEFINE_PROP_END_OF_LIST(),
3816 };
3817 
3818 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3819 {
3820     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3821     int i, n, r, err;
3822 
3823     /*
3824      * Batch all the host notifiers in a single transaction to avoid
3825      * quadratic time complexity in address_space_update_ioeventfds().
3826      */
3827     memory_region_transaction_begin();
3828     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3829         VirtQueue *vq = &vdev->vq[n];
3830         if (!virtio_queue_get_num(vdev, n)) {
3831             continue;
3832         }
3833         r = virtio_bus_set_host_notifier(qbus, n, true);
3834         if (r < 0) {
3835             err = r;
3836             goto assign_error;
3837         }
3838         event_notifier_set_handler(&vq->host_notifier,
3839                                    virtio_queue_host_notifier_read);
3840     }
3841 
3842     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3843         /* Kick right away to begin processing requests already in vring */
3844         VirtQueue *vq = &vdev->vq[n];
3845         if (!vq->vring.num) {
3846             continue;
3847         }
3848         event_notifier_set(&vq->host_notifier);
3849     }
3850     memory_region_transaction_commit();
3851     return 0;
3852 
3853 assign_error:
3854     i = n; /* save n for a second iteration after transaction is committed. */
3855     while (--n >= 0) {
3856         VirtQueue *vq = &vdev->vq[n];
3857         if (!virtio_queue_get_num(vdev, n)) {
3858             continue;
3859         }
3860 
3861         event_notifier_set_handler(&vq->host_notifier, NULL);
3862         r = virtio_bus_set_host_notifier(qbus, n, false);
3863         assert(r >= 0);
3864     }
3865     /*
3866      * The transaction expects the ioeventfds to be open when it
3867      * commits. Do it now, before the cleanup loop.
3868      */
3869     memory_region_transaction_commit();
3870 
3871     while (--i >= 0) {
3872         if (!virtio_queue_get_num(vdev, i)) {
3873             continue;
3874         }
3875         virtio_bus_cleanup_host_notifier(qbus, i);
3876     }
3877     return err;
3878 }
3879 
3880 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3881 {
3882     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3883     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3884 
3885     return virtio_bus_start_ioeventfd(vbus);
3886 }
3887 
3888 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3889 {
3890     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3891     int n, r;
3892 
3893     /*
3894      * Batch all the host notifiers in a single transaction to avoid
3895      * quadratic time complexity in address_space_update_ioeventfds().
3896      */
3897     memory_region_transaction_begin();
3898     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3899         VirtQueue *vq = &vdev->vq[n];
3900 
3901         if (!virtio_queue_get_num(vdev, n)) {
3902             continue;
3903         }
3904         event_notifier_set_handler(&vq->host_notifier, NULL);
3905         r = virtio_bus_set_host_notifier(qbus, n, false);
3906         assert(r >= 0);
3907     }
3908     /*
3909      * The transaction expects the ioeventfds to be open when it
3910      * commits. Do it now, before the cleanup loop.
3911      */
3912     memory_region_transaction_commit();
3913 
3914     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3915         if (!virtio_queue_get_num(vdev, n)) {
3916             continue;
3917         }
3918         virtio_bus_cleanup_host_notifier(qbus, n);
3919     }
3920 }
3921 
3922 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3923 {
3924     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3925     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3926 
3927     return virtio_bus_grab_ioeventfd(vbus);
3928 }
3929 
3930 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3931 {
3932     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3933     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3934 
3935     virtio_bus_release_ioeventfd(vbus);
3936 }
3937 
3938 static void virtio_device_class_init(ObjectClass *klass, void *data)
3939 {
3940     /* Set the default value here. */
3941     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3942     DeviceClass *dc = DEVICE_CLASS(klass);
3943 
3944     dc->realize = virtio_device_realize;
3945     dc->unrealize = virtio_device_unrealize;
3946     dc->bus_type = TYPE_VIRTIO_BUS;
3947     device_class_set_props(dc, virtio_properties);
3948     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3949     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3950 
3951     vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3952 }
3953 
3954 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3955 {
3956     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3957     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3958 
3959     return virtio_bus_ioeventfd_enabled(vbus);
3960 }
3961 
3962 VirtQueueStatus *qmp_x_query_virtio_queue_status(const char *path,
3963                                                  uint16_t queue,
3964                                                  Error **errp)
3965 {
3966     VirtIODevice *vdev;
3967     VirtQueueStatus *status;
3968 
3969     vdev = qmp_find_virtio_device(path);
3970     if (vdev == NULL) {
3971         error_setg(errp, "Path %s is not a VirtIODevice", path);
3972         return NULL;
3973     }
3974 
3975     if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
3976         error_setg(errp, "Invalid virtqueue number %d", queue);
3977         return NULL;
3978     }
3979 
3980     status = g_new0(VirtQueueStatus, 1);
3981     status->name = g_strdup(vdev->name);
3982     status->queue_index = vdev->vq[queue].queue_index;
3983     status->inuse = vdev->vq[queue].inuse;
3984     status->vring_num = vdev->vq[queue].vring.num;
3985     status->vring_num_default = vdev->vq[queue].vring.num_default;
3986     status->vring_align = vdev->vq[queue].vring.align;
3987     status->vring_desc = vdev->vq[queue].vring.desc;
3988     status->vring_avail = vdev->vq[queue].vring.avail;
3989     status->vring_used = vdev->vq[queue].vring.used;
3990     status->used_idx = vdev->vq[queue].used_idx;
3991     status->signalled_used = vdev->vq[queue].signalled_used;
3992     status->signalled_used_valid = vdev->vq[queue].signalled_used_valid;
3993 
3994     if (vdev->vhost_started) {
3995         VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3996         struct vhost_dev *hdev = vdc->get_vhost(vdev);
3997 
3998         /* check if vq index exists for vhost as well  */
3999         if (queue >= hdev->vq_index && queue < hdev->vq_index + hdev->nvqs) {
4000             status->has_last_avail_idx = true;
4001 
4002             int vhost_vq_index =
4003                 hdev->vhost_ops->vhost_get_vq_index(hdev, queue);
4004             struct vhost_vring_state state = {
4005                 .index = vhost_vq_index,
4006             };
4007 
4008             status->last_avail_idx =
4009                 hdev->vhost_ops->vhost_get_vring_base(hdev, &state);
4010         }
4011     } else {
4012         status->has_shadow_avail_idx = true;
4013         status->has_last_avail_idx = true;
4014         status->last_avail_idx = vdev->vq[queue].last_avail_idx;
4015         status->shadow_avail_idx = vdev->vq[queue].shadow_avail_idx;
4016     }
4017 
4018     return status;
4019 }
4020 
4021 static strList *qmp_decode_vring_desc_flags(uint16_t flags)
4022 {
4023     strList *list = NULL;
4024     strList *node;
4025     int i;
4026 
4027     struct {
4028         uint16_t flag;
4029         const char *value;
4030     } map[] = {
4031         { VRING_DESC_F_NEXT, "next" },
4032         { VRING_DESC_F_WRITE, "write" },
4033         { VRING_DESC_F_INDIRECT, "indirect" },
4034         { 1 << VRING_PACKED_DESC_F_AVAIL, "avail" },
4035         { 1 << VRING_PACKED_DESC_F_USED, "used" },
4036         { 0, "" }
4037     };
4038 
4039     for (i = 0; map[i].flag; i++) {
4040         if ((map[i].flag & flags) == 0) {
4041             continue;
4042         }
4043         node = g_malloc0(sizeof(strList));
4044         node->value = g_strdup(map[i].value);
4045         node->next = list;
4046         list = node;
4047     }
4048 
4049     return list;
4050 }
4051 
4052 VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path,
4053                                                      uint16_t queue,
4054                                                      bool has_index,
4055                                                      uint16_t index,
4056                                                      Error **errp)
4057 {
4058     VirtIODevice *vdev;
4059     VirtQueue *vq;
4060     VirtioQueueElement *element = NULL;
4061 
4062     vdev = qmp_find_virtio_device(path);
4063     if (vdev == NULL) {
4064         error_setg(errp, "Path %s is not a VirtIO device", path);
4065         return NULL;
4066     }
4067 
4068     if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
4069         error_setg(errp, "Invalid virtqueue number %d", queue);
4070         return NULL;
4071     }
4072     vq = &vdev->vq[queue];
4073 
4074     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
4075         error_setg(errp, "Packed ring not supported");
4076         return NULL;
4077     } else {
4078         unsigned int head, i, max;
4079         VRingMemoryRegionCaches *caches;
4080         MemoryRegionCache indirect_desc_cache;
4081         MemoryRegionCache *desc_cache;
4082         VRingDesc desc;
4083         VirtioRingDescList *list = NULL;
4084         VirtioRingDescList *node;
4085         int rc; int ndescs;
4086 
4087         address_space_cache_init_empty(&indirect_desc_cache);
4088 
4089         RCU_READ_LOCK_GUARD();
4090 
4091         max = vq->vring.num;
4092 
4093         if (!has_index) {
4094             head = vring_avail_ring(vq, vq->last_avail_idx % vq->vring.num);
4095         } else {
4096             head = vring_avail_ring(vq, index % vq->vring.num);
4097         }
4098         i = head;
4099 
4100         caches = vring_get_region_caches(vq);
4101         if (!caches) {
4102             error_setg(errp, "Region caches not initialized");
4103             return NULL;
4104         }
4105         if (caches->desc.len < max * sizeof(VRingDesc)) {
4106             error_setg(errp, "Cannot map descriptor ring");
4107             return NULL;
4108         }
4109 
4110         desc_cache = &caches->desc;
4111         vring_split_desc_read(vdev, &desc, desc_cache, i);
4112         if (desc.flags & VRING_DESC_F_INDIRECT) {
4113             int64_t len;
4114             len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
4115                                            desc.addr, desc.len, false);
4116             desc_cache = &indirect_desc_cache;
4117             if (len < desc.len) {
4118                 error_setg(errp, "Cannot map indirect buffer");
4119                 goto done;
4120             }
4121 
4122             max = desc.len / sizeof(VRingDesc);
4123             i = 0;
4124             vring_split_desc_read(vdev, &desc, desc_cache, i);
4125         }
4126 
4127         element = g_new0(VirtioQueueElement, 1);
4128         element->avail = g_new0(VirtioRingAvail, 1);
4129         element->used = g_new0(VirtioRingUsed, 1);
4130         element->name = g_strdup(vdev->name);
4131         element->index = head;
4132         element->avail->flags = vring_avail_flags(vq);
4133         element->avail->idx = vring_avail_idx(vq);
4134         element->avail->ring = head;
4135         element->used->flags = vring_used_flags(vq);
4136         element->used->idx = vring_used_idx(vq);
4137         ndescs = 0;
4138 
4139         do {
4140             /* A buggy driver may produce an infinite loop */
4141             if (ndescs >= max) {
4142                 break;
4143             }
4144             node = g_new0(VirtioRingDescList, 1);
4145             node->value = g_new0(VirtioRingDesc, 1);
4146             node->value->addr = desc.addr;
4147             node->value->len = desc.len;
4148             node->value->flags = qmp_decode_vring_desc_flags(desc.flags);
4149             node->next = list;
4150             list = node;
4151 
4152             ndescs++;
4153             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max);
4154         } while (rc == VIRTQUEUE_READ_DESC_MORE);
4155         element->descs = list;
4156 done:
4157         address_space_cache_destroy(&indirect_desc_cache);
4158     }
4159 
4160     return element;
4161 }
4162 
4163 static const TypeInfo virtio_device_info = {
4164     .name = TYPE_VIRTIO_DEVICE,
4165     .parent = TYPE_DEVICE,
4166     .instance_size = sizeof(VirtIODevice),
4167     .class_init = virtio_device_class_init,
4168     .instance_finalize = virtio_device_instance_finalize,
4169     .abstract = true,
4170     .class_size = sizeof(VirtioDeviceClass),
4171 };
4172 
4173 static void virtio_register_types(void)
4174 {
4175     type_register_static(&virtio_device_info);
4176 }
4177 
4178 type_init(virtio_register_types)
4179