xref: /openbmc/qemu/hw/virtio/virtio.c (revision e5859141b9b6aec9e0a14dacedc9f02fe2f15844)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qapi/qapi-commands-virtio.h"
17 #include "trace.h"
18 #include "qemu/defer-call.h"
19 #include "qemu/error-report.h"
20 #include "qemu/log.h"
21 #include "qemu/main-loop.h"
22 #include "qemu/module.h"
23 #include "qemu/target-info.h"
24 #include "qom/object_interfaces.h"
25 #include "hw/core/cpu.h"
26 #include "hw/virtio/virtio.h"
27 #include "hw/virtio/vhost.h"
28 #include "migration/qemu-file-types.h"
29 #include "qemu/atomic.h"
30 #include "hw/virtio/virtio-bus.h"
31 #include "hw/qdev-properties.h"
32 #include "hw/virtio/virtio-access.h"
33 #include "system/dma.h"
34 #include "system/runstate.h"
35 #include "virtio-qmp.h"
36 
37 #include "standard-headers/linux/virtio_ids.h"
38 #include "standard-headers/linux/vhost_types.h"
39 #include "standard-headers/linux/virtio_blk.h"
40 #include "standard-headers/linux/virtio_console.h"
41 #include "standard-headers/linux/virtio_gpu.h"
42 #include "standard-headers/linux/virtio_net.h"
43 #include "standard-headers/linux/virtio_scsi.h"
44 #include "standard-headers/linux/virtio_i2c.h"
45 #include "standard-headers/linux/virtio_balloon.h"
46 #include "standard-headers/linux/virtio_iommu.h"
47 #include "standard-headers/linux/virtio_mem.h"
48 #include "standard-headers/linux/virtio_vsock.h"
49 
50 /*
51  * Maximum size of virtio device config space
52  */
53 #define VHOST_USER_MAX_CONFIG_SIZE 256
54 
55 /*
56  * The alignment to use between consumer and producer parts of vring.
57  * x86 pagesize again. This is the default, used by transports like PCI
58  * which don't provide a means for the guest to tell the host the alignment.
59  */
60 #define VIRTIO_PCI_VRING_ALIGN         4096
61 
62 typedef struct VRingDesc
63 {
64     uint64_t addr;
65     uint32_t len;
66     uint16_t flags;
67     uint16_t next;
68 } VRingDesc;
69 
70 typedef struct VRingPackedDesc {
71     uint64_t addr;
72     uint32_t len;
73     uint16_t id;
74     uint16_t flags;
75 } VRingPackedDesc;
76 
77 typedef struct VRingAvail
78 {
79     uint16_t flags;
80     uint16_t idx;
81     uint16_t ring[];
82 } VRingAvail;
83 
84 typedef struct VRingUsedElem
85 {
86     uint32_t id;
87     uint32_t len;
88 } VRingUsedElem;
89 
90 typedef struct VRingUsed
91 {
92     uint16_t flags;
93     uint16_t idx;
94     VRingUsedElem ring[];
95 } VRingUsed;
96 
97 typedef struct VRingMemoryRegionCaches {
98     struct rcu_head rcu;
99     MemoryRegionCache desc;
100     MemoryRegionCache avail;
101     MemoryRegionCache used;
102 } VRingMemoryRegionCaches;
103 
104 typedef struct VRing
105 {
106     unsigned int num;
107     unsigned int num_default;
108     unsigned int align;
109     hwaddr desc;
110     hwaddr avail;
111     hwaddr used;
112     VRingMemoryRegionCaches *caches;
113 } VRing;
114 
115 typedef struct VRingPackedDescEvent {
116     uint16_t off_wrap;
117     uint16_t flags;
118 } VRingPackedDescEvent ;
119 
120 struct VirtQueue
121 {
122     VRing vring;
123     VirtQueueElement *used_elems;
124 
125     /* Next head to pop */
126     uint16_t last_avail_idx;
127     bool last_avail_wrap_counter;
128 
129     /* Last avail_idx read from VQ. */
130     uint16_t shadow_avail_idx;
131     bool shadow_avail_wrap_counter;
132 
133     uint16_t used_idx;
134     bool used_wrap_counter;
135 
136     /* Last used index value we have signalled on */
137     uint16_t signalled_used;
138 
139     /* Last used index value we have signalled on */
140     bool signalled_used_valid;
141 
142     /* Notification enabled? */
143     bool notification;
144 
145     uint16_t queue_index;
146 
147     unsigned int inuse;
148 
149     uint16_t vector;
150     VirtIOHandleOutput handle_output;
151     VirtIODevice *vdev;
152     EventNotifier guest_notifier;
153     EventNotifier host_notifier;
154     bool host_notifier_enabled;
155     QLIST_ENTRY(VirtQueue) node;
156 };
157 
158 const char *virtio_device_names[] = {
159     [VIRTIO_ID_NET] = "virtio-net",
160     [VIRTIO_ID_BLOCK] = "virtio-blk",
161     [VIRTIO_ID_CONSOLE] = "virtio-serial",
162     [VIRTIO_ID_RNG] = "virtio-rng",
163     [VIRTIO_ID_BALLOON] = "virtio-balloon",
164     [VIRTIO_ID_IOMEM] = "virtio-iomem",
165     [VIRTIO_ID_RPMSG] = "virtio-rpmsg",
166     [VIRTIO_ID_SCSI] = "virtio-scsi",
167     [VIRTIO_ID_9P] = "virtio-9p",
168     [VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan",
169     [VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial",
170     [VIRTIO_ID_CAIF] = "virtio-caif",
171     [VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon",
172     [VIRTIO_ID_GPU] = "virtio-gpu",
173     [VIRTIO_ID_CLOCK] = "virtio-clk",
174     [VIRTIO_ID_INPUT] = "virtio-input",
175     [VIRTIO_ID_VSOCK] = "vhost-vsock",
176     [VIRTIO_ID_CRYPTO] = "virtio-crypto",
177     [VIRTIO_ID_SIGNAL_DIST] = "virtio-signal",
178     [VIRTIO_ID_PSTORE] = "virtio-pstore",
179     [VIRTIO_ID_IOMMU] = "virtio-iommu",
180     [VIRTIO_ID_MEM] = "virtio-mem",
181     [VIRTIO_ID_SOUND] = "virtio-sound",
182     [VIRTIO_ID_FS] = "virtio-user-fs",
183     [VIRTIO_ID_PMEM] = "virtio-pmem",
184     [VIRTIO_ID_RPMB] = "virtio-rpmb",
185     [VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim",
186     [VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder",
187     [VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder",
188     [VIRTIO_ID_SCMI] = "virtio-scmi",
189     [VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod",
190     [VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c",
191     [VIRTIO_ID_WATCHDOG] = "virtio-watchdog",
192     [VIRTIO_ID_CAN] = "virtio-can",
193     [VIRTIO_ID_DMABUF] = "virtio-dmabuf",
194     [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv",
195     [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol",
196     [VIRTIO_ID_BT] = "virtio-bluetooth",
197     [VIRTIO_ID_GPIO] = "virtio-gpio"
198 };
199 
virtio_id_to_name(uint16_t device_id)200 static const char *virtio_id_to_name(uint16_t device_id)
201 {
202     assert(device_id < G_N_ELEMENTS(virtio_device_names));
203     const char *name = virtio_device_names[device_id];
204     assert(name != NULL);
205     return name;
206 }
207 
virtio_check_indirect_feature(VirtIODevice * vdev)208 static void virtio_check_indirect_feature(VirtIODevice *vdev)
209 {
210     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
211         qemu_log_mask(LOG_GUEST_ERROR,
212                       "Device %s: indirect_desc was not negotiated!\n",
213                       vdev->name);
214     }
215 }
216 
217 /* Called within call_rcu().  */
virtio_free_region_cache(VRingMemoryRegionCaches * caches)218 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
219 {
220     assert(caches != NULL);
221     address_space_cache_destroy(&caches->desc);
222     address_space_cache_destroy(&caches->avail);
223     address_space_cache_destroy(&caches->used);
224     g_free(caches);
225 }
226 
virtio_virtqueue_reset_region_cache(struct VirtQueue * vq)227 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
228 {
229     VRingMemoryRegionCaches *caches;
230 
231     caches = qatomic_read(&vq->vring.caches);
232     qatomic_rcu_set(&vq->vring.caches, NULL);
233     if (caches) {
234         call_rcu(caches, virtio_free_region_cache, rcu);
235     }
236 }
237 
virtio_init_region_cache(VirtIODevice * vdev,int n)238 void virtio_init_region_cache(VirtIODevice *vdev, int n)
239 {
240     VirtQueue *vq = &vdev->vq[n];
241     VRingMemoryRegionCaches *old = vq->vring.caches;
242     VRingMemoryRegionCaches *new = NULL;
243     hwaddr addr, size;
244     int64_t len;
245     bool packed;
246 
247 
248     addr = vq->vring.desc;
249     if (!addr) {
250         goto out_no_cache;
251     }
252     new = g_new0(VRingMemoryRegionCaches, 1);
253     size = virtio_queue_get_desc_size(vdev, n);
254     packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
255                                    true : false;
256     len = address_space_cache_init(&new->desc, vdev->dma_as,
257                                    addr, size, packed);
258     if (len < size) {
259         virtio_error(vdev, "Cannot map desc");
260         goto err_desc;
261     }
262 
263     size = virtio_queue_get_used_size(vdev, n);
264     len = address_space_cache_init(&new->used, vdev->dma_as,
265                                    vq->vring.used, size, true);
266     if (len < size) {
267         virtio_error(vdev, "Cannot map used");
268         goto err_used;
269     }
270 
271     size = virtio_queue_get_avail_size(vdev, n);
272     len = address_space_cache_init(&new->avail, vdev->dma_as,
273                                    vq->vring.avail, size, false);
274     if (len < size) {
275         virtio_error(vdev, "Cannot map avail");
276         goto err_avail;
277     }
278 
279     qatomic_rcu_set(&vq->vring.caches, new);
280     if (old) {
281         call_rcu(old, virtio_free_region_cache, rcu);
282     }
283     return;
284 
285 err_avail:
286     address_space_cache_destroy(&new->avail);
287 err_used:
288     address_space_cache_destroy(&new->used);
289 err_desc:
290     address_space_cache_destroy(&new->desc);
291 out_no_cache:
292     g_free(new);
293     virtio_virtqueue_reset_region_cache(vq);
294 }
295 
296 /* virt queue functions */
virtio_queue_update_rings(VirtIODevice * vdev,int n)297 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
298 {
299     VRing *vring = &vdev->vq[n].vring;
300 
301     if (!vring->num || !vring->desc || !vring->align) {
302         /* not yet setup -> nothing to do */
303         return;
304     }
305     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
306     vring->used = vring_align(vring->avail +
307                               offsetof(VRingAvail, ring[vring->num]),
308                               vring->align);
309     virtio_init_region_cache(vdev, n);
310 }
311 
312 /* Called within rcu_read_lock().  */
vring_split_desc_read(VirtIODevice * vdev,VRingDesc * desc,MemoryRegionCache * cache,int i)313 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
314                                   MemoryRegionCache *cache, int i)
315 {
316     address_space_read_cached(cache, i * sizeof(VRingDesc),
317                               desc, sizeof(VRingDesc));
318     virtio_tswap64s(vdev, &desc->addr);
319     virtio_tswap32s(vdev, &desc->len);
320     virtio_tswap16s(vdev, &desc->flags);
321     virtio_tswap16s(vdev, &desc->next);
322 }
323 
vring_packed_event_read(VirtIODevice * vdev,MemoryRegionCache * cache,VRingPackedDescEvent * e)324 static void vring_packed_event_read(VirtIODevice *vdev,
325                                     MemoryRegionCache *cache,
326                                     VRingPackedDescEvent *e)
327 {
328     hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
329     hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
330 
331     e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
332     /* Make sure flags is seen before off_wrap */
333     smp_rmb();
334     e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
335 }
336 
vring_packed_off_wrap_write(VirtIODevice * vdev,MemoryRegionCache * cache,uint16_t off_wrap)337 static void vring_packed_off_wrap_write(VirtIODevice *vdev,
338                                         MemoryRegionCache *cache,
339                                         uint16_t off_wrap)
340 {
341     hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
342 
343     virtio_stw_phys_cached(vdev, cache, off, off_wrap);
344     address_space_cache_invalidate(cache, off, sizeof(off_wrap));
345 }
346 
vring_packed_flags_write(VirtIODevice * vdev,MemoryRegionCache * cache,uint16_t flags)347 static void vring_packed_flags_write(VirtIODevice *vdev,
348                                      MemoryRegionCache *cache, uint16_t flags)
349 {
350     hwaddr off = offsetof(VRingPackedDescEvent, flags);
351 
352     virtio_stw_phys_cached(vdev, cache, off, flags);
353     address_space_cache_invalidate(cache, off, sizeof(flags));
354 }
355 
356 /* Called within rcu_read_lock().  */
vring_get_region_caches(struct VirtQueue * vq)357 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
358 {
359     return qatomic_rcu_read(&vq->vring.caches);
360 }
361 
362 /* Called within rcu_read_lock().  */
vring_avail_flags(VirtQueue * vq)363 static inline uint16_t vring_avail_flags(VirtQueue *vq)
364 {
365     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
366     hwaddr pa = offsetof(VRingAvail, flags);
367 
368     if (!caches) {
369         return 0;
370     }
371 
372     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
373 }
374 
375 /* Called within rcu_read_lock().  */
vring_avail_idx(VirtQueue * vq)376 static inline uint16_t vring_avail_idx(VirtQueue *vq)
377 {
378     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
379     hwaddr pa = offsetof(VRingAvail, idx);
380 
381     if (!caches) {
382         return 0;
383     }
384 
385     vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
386     return vq->shadow_avail_idx;
387 }
388 
389 /* Called within rcu_read_lock().  */
vring_avail_ring(VirtQueue * vq,int i)390 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
391 {
392     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
393     hwaddr pa = offsetof(VRingAvail, ring[i]);
394 
395     if (!caches) {
396         return 0;
397     }
398 
399     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
400 }
401 
402 /* Called within rcu_read_lock().  */
vring_get_used_event(VirtQueue * vq)403 static inline uint16_t vring_get_used_event(VirtQueue *vq)
404 {
405     return vring_avail_ring(vq, vq->vring.num);
406 }
407 
408 /* Called within rcu_read_lock().  */
vring_used_write(VirtQueue * vq,VRingUsedElem * uelem,int i)409 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
410                                     int i)
411 {
412     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
413     hwaddr pa = offsetof(VRingUsed, ring[i]);
414 
415     if (!caches) {
416         return;
417     }
418 
419     virtio_tswap32s(vq->vdev, &uelem->id);
420     virtio_tswap32s(vq->vdev, &uelem->len);
421     address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
422     address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
423 }
424 
425 /* Called within rcu_read_lock(). */
vring_used_flags(VirtQueue * vq)426 static inline uint16_t vring_used_flags(VirtQueue *vq)
427 {
428     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
429     hwaddr pa = offsetof(VRingUsed, flags);
430 
431     if (!caches) {
432         return 0;
433     }
434 
435     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
436 }
437 
438 /* Called within rcu_read_lock().  */
vring_used_idx(VirtQueue * vq)439 static uint16_t vring_used_idx(VirtQueue *vq)
440 {
441     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
442     hwaddr pa = offsetof(VRingUsed, idx);
443 
444     if (!caches) {
445         return 0;
446     }
447 
448     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
449 }
450 
451 /* Called within rcu_read_lock().  */
vring_used_idx_set(VirtQueue * vq,uint16_t val)452 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
453 {
454     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
455     hwaddr pa = offsetof(VRingUsed, idx);
456 
457     if (caches) {
458         virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
459         address_space_cache_invalidate(&caches->used, pa, sizeof(val));
460     }
461 
462     vq->used_idx = val;
463 }
464 
465 /* Called within rcu_read_lock().  */
vring_used_flags_set_bit(VirtQueue * vq,int mask)466 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
467 {
468     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
469     VirtIODevice *vdev = vq->vdev;
470     hwaddr pa = offsetof(VRingUsed, flags);
471     uint16_t flags;
472 
473     if (!caches) {
474         return;
475     }
476 
477     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
478     virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
479     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
480 }
481 
482 /* Called within rcu_read_lock().  */
vring_used_flags_unset_bit(VirtQueue * vq,int mask)483 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
484 {
485     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
486     VirtIODevice *vdev = vq->vdev;
487     hwaddr pa = offsetof(VRingUsed, flags);
488     uint16_t flags;
489 
490     if (!caches) {
491         return;
492     }
493 
494     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
495     virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
496     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
497 }
498 
499 /* Called within rcu_read_lock().  */
vring_set_avail_event(VirtQueue * vq,uint16_t val)500 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
501 {
502     VRingMemoryRegionCaches *caches;
503     hwaddr pa;
504     if (!vq->notification) {
505         return;
506     }
507 
508     caches = vring_get_region_caches(vq);
509     if (!caches) {
510         return;
511     }
512 
513     pa = offsetof(VRingUsed, ring[vq->vring.num]);
514     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
515     address_space_cache_invalidate(&caches->used, pa, sizeof(val));
516 }
517 
virtio_queue_split_set_notification(VirtQueue * vq,int enable)518 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
519 {
520     RCU_READ_LOCK_GUARD();
521 
522     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
523         vring_set_avail_event(vq, vring_avail_idx(vq));
524     } else if (enable) {
525         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
526     } else {
527         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
528     }
529     if (enable) {
530         /* Expose avail event/used flags before caller checks the avail idx. */
531         smp_mb();
532     }
533 }
534 
virtio_queue_packed_set_notification(VirtQueue * vq,int enable)535 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
536 {
537     uint16_t off_wrap;
538     VRingPackedDescEvent e;
539     VRingMemoryRegionCaches *caches;
540 
541     RCU_READ_LOCK_GUARD();
542     caches = vring_get_region_caches(vq);
543     if (!caches) {
544         return;
545     }
546 
547     vring_packed_event_read(vq->vdev, &caches->used, &e);
548 
549     if (!enable) {
550         e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
551     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
552         off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
553         vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
554         /* Make sure off_wrap is wrote before flags */
555         smp_wmb();
556         e.flags = VRING_PACKED_EVENT_FLAG_DESC;
557     } else {
558         e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
559     }
560 
561     vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
562     if (enable) {
563         /* Expose avail event/used flags before caller checks the avail idx. */
564         smp_mb();
565     }
566 }
567 
virtio_queue_get_notification(VirtQueue * vq)568 bool virtio_queue_get_notification(VirtQueue *vq)
569 {
570     return vq->notification;
571 }
572 
virtio_queue_set_notification(VirtQueue * vq,int enable)573 void virtio_queue_set_notification(VirtQueue *vq, int enable)
574 {
575     vq->notification = enable;
576 
577     if (!vq->vring.desc) {
578         return;
579     }
580 
581     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
582         virtio_queue_packed_set_notification(vq, enable);
583     } else {
584         virtio_queue_split_set_notification(vq, enable);
585     }
586 }
587 
virtio_queue_ready(VirtQueue * vq)588 int virtio_queue_ready(VirtQueue *vq)
589 {
590     return vq->vring.avail != 0;
591 }
592 
vring_packed_desc_read_flags(VirtIODevice * vdev,uint16_t * flags,MemoryRegionCache * cache,int i)593 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
594                                          uint16_t *flags,
595                                          MemoryRegionCache *cache,
596                                          int i)
597 {
598     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
599 
600     *flags = virtio_lduw_phys_cached(vdev, cache, off);
601 }
602 
vring_packed_desc_read(VirtIODevice * vdev,VRingPackedDesc * desc,MemoryRegionCache * cache,int i,bool strict_order)603 static void vring_packed_desc_read(VirtIODevice *vdev,
604                                    VRingPackedDesc *desc,
605                                    MemoryRegionCache *cache,
606                                    int i, bool strict_order)
607 {
608     hwaddr off = i * sizeof(VRingPackedDesc);
609 
610     vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
611 
612     if (strict_order) {
613         /* Make sure flags is read before the rest fields. */
614         smp_rmb();
615     }
616 
617     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
618                               &desc->addr, sizeof(desc->addr));
619     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
620                               &desc->id, sizeof(desc->id));
621     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
622                               &desc->len, sizeof(desc->len));
623     virtio_tswap64s(vdev, &desc->addr);
624     virtio_tswap16s(vdev, &desc->id);
625     virtio_tswap32s(vdev, &desc->len);
626 }
627 
vring_packed_desc_write_data(VirtIODevice * vdev,VRingPackedDesc * desc,MemoryRegionCache * cache,int i)628 static void vring_packed_desc_write_data(VirtIODevice *vdev,
629                                          VRingPackedDesc *desc,
630                                          MemoryRegionCache *cache,
631                                          int i)
632 {
633     hwaddr off_id = i * sizeof(VRingPackedDesc) +
634                     offsetof(VRingPackedDesc, id);
635     hwaddr off_len = i * sizeof(VRingPackedDesc) +
636                     offsetof(VRingPackedDesc, len);
637 
638     virtio_tswap32s(vdev, &desc->len);
639     virtio_tswap16s(vdev, &desc->id);
640     address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
641     address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
642     address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
643     address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
644 }
645 
vring_packed_desc_write_flags(VirtIODevice * vdev,VRingPackedDesc * desc,MemoryRegionCache * cache,int i)646 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
647                                           VRingPackedDesc *desc,
648                                           MemoryRegionCache *cache,
649                                           int i)
650 {
651     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
652 
653     virtio_stw_phys_cached(vdev, cache, off, desc->flags);
654     address_space_cache_invalidate(cache, off, sizeof(desc->flags));
655 }
656 
vring_packed_desc_write(VirtIODevice * vdev,VRingPackedDesc * desc,MemoryRegionCache * cache,int i,bool strict_order)657 static void vring_packed_desc_write(VirtIODevice *vdev,
658                                     VRingPackedDesc *desc,
659                                     MemoryRegionCache *cache,
660                                     int i, bool strict_order)
661 {
662     vring_packed_desc_write_data(vdev, desc, cache, i);
663     if (strict_order) {
664         /* Make sure data is wrote before flags. */
665         smp_wmb();
666     }
667     vring_packed_desc_write_flags(vdev, desc, cache, i);
668 }
669 
is_desc_avail(uint16_t flags,bool wrap_counter)670 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
671 {
672     bool avail, used;
673 
674     avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
675     used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
676     return (avail != used) && (avail == wrap_counter);
677 }
678 
679 /* Fetch avail_idx from VQ memory only when we really need to know if
680  * guest has added some buffers.
681  * Called within rcu_read_lock().  */
virtio_queue_empty_rcu(VirtQueue * vq)682 static int virtio_queue_empty_rcu(VirtQueue *vq)
683 {
684     if (virtio_device_disabled(vq->vdev)) {
685         return 1;
686     }
687 
688     if (unlikely(!vq->vring.avail)) {
689         return 1;
690     }
691 
692     if (vq->shadow_avail_idx != vq->last_avail_idx) {
693         return 0;
694     }
695 
696     return vring_avail_idx(vq) == vq->last_avail_idx;
697 }
698 
virtio_queue_split_empty(VirtQueue * vq)699 static int virtio_queue_split_empty(VirtQueue *vq)
700 {
701     bool empty;
702 
703     if (virtio_device_disabled(vq->vdev)) {
704         return 1;
705     }
706 
707     if (unlikely(!vq->vring.avail)) {
708         return 1;
709     }
710 
711     if (vq->shadow_avail_idx != vq->last_avail_idx) {
712         return 0;
713     }
714 
715     RCU_READ_LOCK_GUARD();
716     empty = vring_avail_idx(vq) == vq->last_avail_idx;
717     return empty;
718 }
719 
720 /* Called within rcu_read_lock().  */
virtio_queue_packed_empty_rcu(VirtQueue * vq)721 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
722 {
723     struct VRingPackedDesc desc;
724     VRingMemoryRegionCaches *cache;
725 
726     if (unlikely(!vq->vring.desc)) {
727         return 1;
728     }
729 
730     cache = vring_get_region_caches(vq);
731     if (!cache) {
732         return 1;
733     }
734 
735     vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
736                                  vq->last_avail_idx);
737 
738     return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
739 }
740 
virtio_queue_packed_empty(VirtQueue * vq)741 static int virtio_queue_packed_empty(VirtQueue *vq)
742 {
743     RCU_READ_LOCK_GUARD();
744     return virtio_queue_packed_empty_rcu(vq);
745 }
746 
virtio_queue_empty(VirtQueue * vq)747 int virtio_queue_empty(VirtQueue *vq)
748 {
749     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
750         return virtio_queue_packed_empty(vq);
751     } else {
752         return virtio_queue_split_empty(vq);
753     }
754 }
755 
virtio_queue_split_poll(VirtQueue * vq,unsigned shadow_idx)756 static bool virtio_queue_split_poll(VirtQueue *vq, unsigned shadow_idx)
757 {
758     if (unlikely(!vq->vring.avail)) {
759         return false;
760     }
761 
762     return (uint16_t)shadow_idx != vring_avail_idx(vq);
763 }
764 
virtio_queue_packed_poll(VirtQueue * vq,unsigned shadow_idx)765 static bool virtio_queue_packed_poll(VirtQueue *vq, unsigned shadow_idx)
766 {
767     VRingPackedDesc desc;
768     VRingMemoryRegionCaches *caches;
769 
770     if (unlikely(!vq->vring.desc)) {
771         return false;
772     }
773 
774     caches = vring_get_region_caches(vq);
775     if (!caches) {
776         return false;
777     }
778 
779     vring_packed_desc_read(vq->vdev, &desc, &caches->desc,
780                            shadow_idx, true);
781 
782     return is_desc_avail(desc.flags, vq->shadow_avail_wrap_counter);
783 }
784 
virtio_queue_poll(VirtQueue * vq,unsigned shadow_idx)785 static bool virtio_queue_poll(VirtQueue *vq, unsigned shadow_idx)
786 {
787     if (virtio_device_disabled(vq->vdev)) {
788         return false;
789     }
790 
791     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
792         return virtio_queue_packed_poll(vq, shadow_idx);
793     } else {
794         return virtio_queue_split_poll(vq, shadow_idx);
795     }
796 }
797 
virtio_queue_enable_notification_and_check(VirtQueue * vq,int opaque)798 bool virtio_queue_enable_notification_and_check(VirtQueue *vq,
799                                                 int opaque)
800 {
801     virtio_queue_set_notification(vq, 1);
802 
803     if (opaque >= 0) {
804         return virtio_queue_poll(vq, (unsigned)opaque);
805     } else {
806         return false;
807     }
808 }
809 
virtqueue_unmap_sg(VirtQueue * vq,const VirtQueueElement * elem,unsigned int len)810 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
811                                unsigned int len)
812 {
813     AddressSpace *dma_as = vq->vdev->dma_as;
814     unsigned int offset;
815     int i;
816 
817     offset = 0;
818     for (i = 0; i < elem->in_num; i++) {
819         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
820 
821         dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
822                          elem->in_sg[i].iov_len,
823                          DMA_DIRECTION_FROM_DEVICE, size);
824 
825         offset += size;
826     }
827 
828     for (i = 0; i < elem->out_num; i++)
829         dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
830                          elem->out_sg[i].iov_len,
831                          DMA_DIRECTION_TO_DEVICE,
832                          elem->out_sg[i].iov_len);
833 }
834 
835 /* virtqueue_detach_element:
836  * @vq: The #VirtQueue
837  * @elem: The #VirtQueueElement
838  * @len: number of bytes written
839  *
840  * Detach the element from the virtqueue.  This function is suitable for device
841  * reset or other situations where a #VirtQueueElement is simply freed and will
842  * not be pushed or discarded.
843  */
virtqueue_detach_element(VirtQueue * vq,const VirtQueueElement * elem,unsigned int len)844 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
845                               unsigned int len)
846 {
847     vq->inuse -= elem->ndescs;
848     virtqueue_unmap_sg(vq, elem, len);
849 }
850 
virtqueue_split_rewind(VirtQueue * vq,unsigned int num)851 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
852 {
853     vq->last_avail_idx -= num;
854 }
855 
virtqueue_packed_rewind(VirtQueue * vq,unsigned int num)856 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
857 {
858     if (vq->last_avail_idx < num) {
859         vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
860         vq->last_avail_wrap_counter ^= 1;
861     } else {
862         vq->last_avail_idx -= num;
863     }
864 }
865 
866 /* virtqueue_unpop:
867  * @vq: The #VirtQueue
868  * @elem: The #VirtQueueElement
869  * @len: number of bytes written
870  *
871  * Pretend the most recent element wasn't popped from the virtqueue.  The next
872  * call to virtqueue_pop() will refetch the element.
873  */
virtqueue_unpop(VirtQueue * vq,const VirtQueueElement * elem,unsigned int len)874 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
875                      unsigned int len)
876 {
877 
878     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
879         virtqueue_packed_rewind(vq, 1);
880     } else {
881         virtqueue_split_rewind(vq, 1);
882     }
883 
884     virtqueue_detach_element(vq, elem, len);
885 }
886 
887 /* virtqueue_rewind:
888  * @vq: The #VirtQueue
889  * @num: Number of elements to push back
890  *
891  * Pretend that elements weren't popped from the virtqueue.  The next
892  * virtqueue_pop() will refetch the oldest element.
893  *
894  * Use virtqueue_unpop() instead if you have a VirtQueueElement.
895  *
896  * Returns: true on success, false if @num is greater than the number of in use
897  * elements.
898  */
virtqueue_rewind(VirtQueue * vq,unsigned int num)899 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
900 {
901     if (num > vq->inuse) {
902         return false;
903     }
904 
905     vq->inuse -= num;
906     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
907         virtqueue_packed_rewind(vq, num);
908     } else {
909         virtqueue_split_rewind(vq, num);
910     }
911     return true;
912 }
913 
virtqueue_split_fill(VirtQueue * vq,const VirtQueueElement * elem,unsigned int len,unsigned int idx)914 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
915                     unsigned int len, unsigned int idx)
916 {
917     VRingUsedElem uelem;
918 
919     if (unlikely(!vq->vring.used)) {
920         return;
921     }
922 
923     idx = (idx + vq->used_idx) % vq->vring.num;
924 
925     uelem.id = elem->index;
926     uelem.len = len;
927     vring_used_write(vq, &uelem, idx);
928 }
929 
virtqueue_packed_fill(VirtQueue * vq,const VirtQueueElement * elem,unsigned int len,unsigned int idx)930 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
931                                   unsigned int len, unsigned int idx)
932 {
933     vq->used_elems[idx].index = elem->index;
934     vq->used_elems[idx].len = len;
935     vq->used_elems[idx].ndescs = elem->ndescs;
936 }
937 
virtqueue_ordered_fill(VirtQueue * vq,const VirtQueueElement * elem,unsigned int len)938 static void virtqueue_ordered_fill(VirtQueue *vq, const VirtQueueElement *elem,
939                                    unsigned int len)
940 {
941     unsigned int i, steps, max_steps, ndescs;
942 
943     i = vq->used_idx % vq->vring.num;
944     steps = 0;
945     /*
946      * We shouldn't need to increase 'i' by more than or equal to
947      * the distance between used_idx and last_avail_idx (max_steps).
948      */
949     max_steps = (vq->last_avail_idx - vq->used_idx) % vq->vring.num;
950 
951     /* Search for element in vq->used_elems */
952     while (steps < max_steps) {
953         /* Found element, set length and mark as filled */
954         if (vq->used_elems[i].index == elem->index) {
955             vq->used_elems[i].len = len;
956             vq->used_elems[i].in_order_filled = true;
957             break;
958         }
959 
960         ndescs = vq->used_elems[i].ndescs;
961 
962         /* Defensive sanity check */
963         if (unlikely(ndescs == 0 || ndescs > vq->vring.num)) {
964             qemu_log_mask(LOG_GUEST_ERROR,
965                           "%s: %s invalid ndescs %u at position %u\n",
966                           __func__, vq->vdev->name, ndescs, i);
967             return;
968         }
969 
970         i += ndescs;
971         steps += ndescs;
972 
973         if (i >= vq->vring.num) {
974             i -= vq->vring.num;
975         }
976     }
977 
978     /*
979      * We should be able to find a matching VirtQueueElement in
980      * used_elems. If we don't, this is an error.
981      */
982     if (steps >= max_steps) {
983         qemu_log_mask(LOG_GUEST_ERROR, "%s: %s cannot fill buffer id %u\n",
984                       __func__, vq->vdev->name, elem->index);
985     }
986 }
987 
virtqueue_packed_fill_desc(VirtQueue * vq,const VirtQueueElement * elem,unsigned int idx,bool strict_order)988 static void virtqueue_packed_fill_desc(VirtQueue *vq,
989                                        const VirtQueueElement *elem,
990                                        unsigned int idx,
991                                        bool strict_order)
992 {
993     uint16_t head;
994     VRingMemoryRegionCaches *caches;
995     VRingPackedDesc desc = {
996         .id = elem->index,
997         .len = elem->len,
998     };
999     bool wrap_counter = vq->used_wrap_counter;
1000 
1001     if (unlikely(!vq->vring.desc)) {
1002         return;
1003     }
1004 
1005     head = vq->used_idx + idx;
1006     if (head >= vq->vring.num) {
1007         head -= vq->vring.num;
1008         wrap_counter ^= 1;
1009     }
1010     if (wrap_counter) {
1011         desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
1012         desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
1013     } else {
1014         desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
1015         desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
1016     }
1017 
1018     caches = vring_get_region_caches(vq);
1019     if (!caches) {
1020         return;
1021     }
1022 
1023     vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
1024 }
1025 
1026 /* Called within rcu_read_lock().  */
virtqueue_fill(VirtQueue * vq,const VirtQueueElement * elem,unsigned int len,unsigned int idx)1027 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
1028                     unsigned int len, unsigned int idx)
1029 {
1030     trace_virtqueue_fill(vq, elem, len, idx);
1031 
1032     virtqueue_unmap_sg(vq, elem, len);
1033 
1034     if (virtio_device_disabled(vq->vdev)) {
1035         return;
1036     }
1037 
1038     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_IN_ORDER)) {
1039         virtqueue_ordered_fill(vq, elem, len);
1040     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1041         virtqueue_packed_fill(vq, elem, len, idx);
1042     } else {
1043         virtqueue_split_fill(vq, elem, len, idx);
1044     }
1045 }
1046 
1047 /* Called within rcu_read_lock().  */
virtqueue_split_flush(VirtQueue * vq,unsigned int count)1048 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
1049 {
1050     uint16_t old, new;
1051 
1052     if (unlikely(!vq->vring.used)) {
1053         return;
1054     }
1055 
1056     /* Make sure buffer is written before we update index. */
1057     smp_wmb();
1058     trace_virtqueue_flush(vq, count);
1059     old = vq->used_idx;
1060     new = old + count;
1061     vring_used_idx_set(vq, new);
1062     vq->inuse -= count;
1063     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
1064         vq->signalled_used_valid = false;
1065 }
1066 
virtqueue_packed_flush(VirtQueue * vq,unsigned int count)1067 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
1068 {
1069     unsigned int i, ndescs = 0;
1070 
1071     if (unlikely(!vq->vring.desc)) {
1072         return;
1073     }
1074 
1075     /*
1076      * For indirect element's 'ndescs' is 1.
1077      * For all other elemment's 'ndescs' is the
1078      * number of descriptors chained by NEXT (as set in virtqueue_packed_pop).
1079      * So When the 'elem' be filled into the descriptor ring,
1080      * The 'idx' of this 'elem' shall be
1081      * the value of 'vq->used_idx' plus the 'ndescs'.
1082      */
1083     ndescs += vq->used_elems[0].ndescs;
1084     for (i = 1; i < count; i++) {
1085         virtqueue_packed_fill_desc(vq, &vq->used_elems[i], ndescs, false);
1086         ndescs += vq->used_elems[i].ndescs;
1087     }
1088     virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
1089 
1090     vq->inuse -= ndescs;
1091     vq->used_idx += ndescs;
1092     if (vq->used_idx >= vq->vring.num) {
1093         vq->used_idx -= vq->vring.num;
1094         vq->used_wrap_counter ^= 1;
1095         vq->signalled_used_valid = false;
1096     }
1097 }
1098 
virtqueue_ordered_flush(VirtQueue * vq)1099 static void virtqueue_ordered_flush(VirtQueue *vq)
1100 {
1101     unsigned int i = vq->used_idx % vq->vring.num;
1102     unsigned int ndescs = 0;
1103     uint16_t old = vq->used_idx;
1104     uint16_t new;
1105     bool packed;
1106     VRingUsedElem uelem;
1107 
1108     packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED);
1109 
1110     if (packed) {
1111         if (unlikely(!vq->vring.desc)) {
1112             return;
1113         }
1114     } else if (unlikely(!vq->vring.used)) {
1115         return;
1116     }
1117 
1118     /* First expected in-order element isn't ready, nothing to do */
1119     if (!vq->used_elems[i].in_order_filled) {
1120         return;
1121     }
1122 
1123     /* Search for filled elements in-order */
1124     while (vq->used_elems[i].in_order_filled) {
1125         /*
1126          * First entry for packed VQs is written last so the guest
1127          * doesn't see invalid descriptors.
1128          */
1129         if (packed && i != vq->used_idx) {
1130             virtqueue_packed_fill_desc(vq, &vq->used_elems[i], ndescs, false);
1131         } else if (!packed) {
1132             uelem.id = vq->used_elems[i].index;
1133             uelem.len = vq->used_elems[i].len;
1134             vring_used_write(vq, &uelem, i);
1135         }
1136 
1137         vq->used_elems[i].in_order_filled = false;
1138         ndescs += vq->used_elems[i].ndescs;
1139         i += vq->used_elems[i].ndescs;
1140         if (i >= vq->vring.num) {
1141             i -= vq->vring.num;
1142         }
1143     }
1144 
1145     if (packed) {
1146         virtqueue_packed_fill_desc(vq, &vq->used_elems[vq->used_idx], 0, true);
1147         vq->used_idx += ndescs;
1148         if (vq->used_idx >= vq->vring.num) {
1149             vq->used_idx -= vq->vring.num;
1150             vq->used_wrap_counter ^= 1;
1151             vq->signalled_used_valid = false;
1152         }
1153     } else {
1154         /* Make sure buffer is written before we update index. */
1155         smp_wmb();
1156         new = old + ndescs;
1157         vring_used_idx_set(vq, new);
1158         if (unlikely((int16_t)(new - vq->signalled_used) <
1159                      (uint16_t)(new - old))) {
1160             vq->signalled_used_valid = false;
1161         }
1162     }
1163     vq->inuse -= ndescs;
1164 }
1165 
virtqueue_flush(VirtQueue * vq,unsigned int count)1166 void virtqueue_flush(VirtQueue *vq, unsigned int count)
1167 {
1168     if (virtio_device_disabled(vq->vdev)) {
1169         vq->inuse -= count;
1170         return;
1171     }
1172 
1173     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_IN_ORDER)) {
1174         virtqueue_ordered_flush(vq);
1175     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1176         virtqueue_packed_flush(vq, count);
1177     } else {
1178         virtqueue_split_flush(vq, count);
1179     }
1180 }
1181 
virtqueue_push(VirtQueue * vq,const VirtQueueElement * elem,unsigned int len)1182 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
1183                     unsigned int len)
1184 {
1185     RCU_READ_LOCK_GUARD();
1186     virtqueue_fill(vq, elem, len, 0);
1187     virtqueue_flush(vq, 1);
1188 }
1189 
1190 /* Called within rcu_read_lock().  */
virtqueue_num_heads(VirtQueue * vq,unsigned int idx)1191 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
1192 {
1193     uint16_t avail_idx, num_heads;
1194 
1195     /* Use shadow index whenever possible. */
1196     avail_idx = (vq->shadow_avail_idx != idx) ? vq->shadow_avail_idx
1197                                               : vring_avail_idx(vq);
1198     num_heads = avail_idx - idx;
1199 
1200     /* Check it isn't doing very strange things with descriptor numbers. */
1201     if (num_heads > vq->vring.num) {
1202         virtio_error(vq->vdev, "Guest moved used index from %u to %u",
1203                      idx, vq->shadow_avail_idx);
1204         return -EINVAL;
1205     }
1206     /*
1207      * On success, callers read a descriptor at vq->last_avail_idx.
1208      * Make sure descriptor read does not bypass avail index read.
1209      *
1210      * This is necessary even if we are using a shadow index, since
1211      * the shadow index could have been initialized by calling
1212      * vring_avail_idx() outside of this function, i.e., by a guest
1213      * memory read not accompanied by a barrier.
1214      */
1215     if (num_heads) {
1216         smp_rmb();
1217     }
1218 
1219     return num_heads;
1220 }
1221 
1222 /* Called within rcu_read_lock().  */
virtqueue_get_head(VirtQueue * vq,unsigned int idx,unsigned int * head)1223 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
1224                                unsigned int *head)
1225 {
1226     /* Grab the next descriptor number they're advertising, and increment
1227      * the index we've seen. */
1228     *head = vring_avail_ring(vq, idx % vq->vring.num);
1229 
1230     /* If their number is silly, that's a fatal mistake. */
1231     if (*head >= vq->vring.num) {
1232         virtio_error(vq->vdev, "Guest says index %u is available", *head);
1233         return false;
1234     }
1235 
1236     return true;
1237 }
1238 
1239 enum {
1240     VIRTQUEUE_READ_DESC_ERROR = -1,
1241     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
1242     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
1243 };
1244 
1245 /* Reads the 'desc->next' descriptor into '*desc'. */
virtqueue_split_read_next_desc(VirtIODevice * vdev,VRingDesc * desc,MemoryRegionCache * desc_cache,unsigned int max)1246 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
1247                                           MemoryRegionCache *desc_cache,
1248                                           unsigned int max)
1249 {
1250     /* If this descriptor says it doesn't chain, we're done. */
1251     if (!(desc->flags & VRING_DESC_F_NEXT)) {
1252         return VIRTQUEUE_READ_DESC_DONE;
1253     }
1254 
1255     /* Check they're not leading us off end of descriptors. */
1256     if (desc->next >= max) {
1257         virtio_error(vdev, "Desc next is %u", desc->next);
1258         return VIRTQUEUE_READ_DESC_ERROR;
1259     }
1260 
1261     vring_split_desc_read(vdev, desc, desc_cache, desc->next);
1262     return VIRTQUEUE_READ_DESC_MORE;
1263 }
1264 
1265 /* Called within rcu_read_lock().  */
virtqueue_split_get_avail_bytes(VirtQueue * vq,unsigned int * in_bytes,unsigned int * out_bytes,unsigned max_in_bytes,unsigned max_out_bytes,VRingMemoryRegionCaches * caches)1266 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
1267                             unsigned int *in_bytes, unsigned int *out_bytes,
1268                             unsigned max_in_bytes, unsigned max_out_bytes,
1269                             VRingMemoryRegionCaches *caches)
1270 {
1271     VirtIODevice *vdev = vq->vdev;
1272     unsigned int idx;
1273     unsigned int total_bufs, in_total, out_total;
1274     MemoryRegionCache indirect_desc_cache;
1275     int64_t len = 0;
1276     int rc;
1277 
1278     address_space_cache_init_empty(&indirect_desc_cache);
1279 
1280     idx = vq->last_avail_idx;
1281     total_bufs = in_total = out_total = 0;
1282 
1283     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1284         MemoryRegionCache *desc_cache = &caches->desc;
1285         unsigned int num_bufs;
1286         VRingDesc desc;
1287         unsigned int i;
1288         unsigned int max = vq->vring.num;
1289 
1290         num_bufs = total_bufs;
1291 
1292         if (!virtqueue_get_head(vq, idx++, &i)) {
1293             goto err;
1294         }
1295 
1296         vring_split_desc_read(vdev, &desc, desc_cache, i);
1297 
1298         if (desc.flags & VRING_DESC_F_INDIRECT) {
1299             if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1300                 virtio_error(vdev, "Invalid size for indirect buffer table");
1301                 goto err;
1302             }
1303 
1304             /* If we've got too many, that implies a descriptor loop. */
1305             if (num_bufs >= max) {
1306                 virtio_error(vdev, "Looped descriptor");
1307                 goto err;
1308             }
1309 
1310             /* loop over the indirect descriptor table */
1311             len = address_space_cache_init(&indirect_desc_cache,
1312                                            vdev->dma_as,
1313                                            desc.addr, desc.len, false);
1314             desc_cache = &indirect_desc_cache;
1315             if (len < desc.len) {
1316                 virtio_error(vdev, "Cannot map indirect buffer");
1317                 goto err;
1318             }
1319 
1320             max = desc.len / sizeof(VRingDesc);
1321             num_bufs = i = 0;
1322             vring_split_desc_read(vdev, &desc, desc_cache, i);
1323         }
1324 
1325         do {
1326             /* If we've got too many, that implies a descriptor loop. */
1327             if (++num_bufs > max) {
1328                 virtio_error(vdev, "Looped descriptor");
1329                 goto err;
1330             }
1331 
1332             if (desc.flags & VRING_DESC_F_WRITE) {
1333                 in_total += desc.len;
1334             } else {
1335                 out_total += desc.len;
1336             }
1337             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1338                 goto done;
1339             }
1340 
1341             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max);
1342         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1343 
1344         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1345             goto err;
1346         }
1347 
1348         if (desc_cache == &indirect_desc_cache) {
1349             address_space_cache_destroy(&indirect_desc_cache);
1350             total_bufs++;
1351         } else {
1352             total_bufs = num_bufs;
1353         }
1354     }
1355 
1356     if (rc < 0) {
1357         goto err;
1358     }
1359 
1360 done:
1361     address_space_cache_destroy(&indirect_desc_cache);
1362     if (in_bytes) {
1363         *in_bytes = in_total;
1364     }
1365     if (out_bytes) {
1366         *out_bytes = out_total;
1367     }
1368     return;
1369 
1370 err:
1371     in_total = out_total = 0;
1372     goto done;
1373 }
1374 
virtqueue_packed_read_next_desc(VirtQueue * vq,VRingPackedDesc * desc,MemoryRegionCache * desc_cache,unsigned int max,unsigned int * next,bool indirect)1375 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1376                                            VRingPackedDesc *desc,
1377                                            MemoryRegionCache
1378                                            *desc_cache,
1379                                            unsigned int max,
1380                                            unsigned int *next,
1381                                            bool indirect)
1382 {
1383     /* If this descriptor says it doesn't chain, we're done. */
1384     if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1385         return VIRTQUEUE_READ_DESC_DONE;
1386     }
1387 
1388     ++*next;
1389     if (*next == max) {
1390         if (indirect) {
1391             return VIRTQUEUE_READ_DESC_DONE;
1392         } else {
1393             (*next) -= vq->vring.num;
1394         }
1395     }
1396 
1397     vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1398     return VIRTQUEUE_READ_DESC_MORE;
1399 }
1400 
1401 /* Called within rcu_read_lock().  */
virtqueue_packed_get_avail_bytes(VirtQueue * vq,unsigned int * in_bytes,unsigned int * out_bytes,unsigned max_in_bytes,unsigned max_out_bytes,VRingMemoryRegionCaches * caches)1402 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1403                                              unsigned int *in_bytes,
1404                                              unsigned int *out_bytes,
1405                                              unsigned max_in_bytes,
1406                                              unsigned max_out_bytes,
1407                                              VRingMemoryRegionCaches *caches)
1408 {
1409     VirtIODevice *vdev = vq->vdev;
1410     unsigned int idx;
1411     unsigned int total_bufs, in_total, out_total;
1412     MemoryRegionCache indirect_desc_cache;
1413     MemoryRegionCache *desc_cache;
1414     int64_t len = 0;
1415     VRingPackedDesc desc;
1416     bool wrap_counter;
1417 
1418     address_space_cache_init_empty(&indirect_desc_cache);
1419 
1420     idx = vq->last_avail_idx;
1421     wrap_counter = vq->last_avail_wrap_counter;
1422     total_bufs = in_total = out_total = 0;
1423 
1424     for (;;) {
1425         unsigned int num_bufs = total_bufs;
1426         unsigned int i = idx;
1427         int rc;
1428         unsigned int max = vq->vring.num;
1429 
1430         desc_cache = &caches->desc;
1431 
1432         vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1433         if (!is_desc_avail(desc.flags, wrap_counter)) {
1434             break;
1435         }
1436 
1437         if (desc.flags & VRING_DESC_F_INDIRECT) {
1438             if (desc.len % sizeof(VRingPackedDesc)) {
1439                 virtio_error(vdev, "Invalid size for indirect buffer table");
1440                 goto err;
1441             }
1442 
1443             /* If we've got too many, that implies a descriptor loop. */
1444             if (num_bufs >= max) {
1445                 virtio_error(vdev, "Looped descriptor");
1446                 goto err;
1447             }
1448 
1449             /* loop over the indirect descriptor table */
1450             len = address_space_cache_init(&indirect_desc_cache,
1451                                            vdev->dma_as,
1452                                            desc.addr, desc.len, false);
1453             desc_cache = &indirect_desc_cache;
1454             if (len < desc.len) {
1455                 virtio_error(vdev, "Cannot map indirect buffer");
1456                 goto err;
1457             }
1458 
1459             max = desc.len / sizeof(VRingPackedDesc);
1460             num_bufs = i = 0;
1461             vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1462         }
1463 
1464         do {
1465             /* If we've got too many, that implies a descriptor loop. */
1466             if (++num_bufs > max) {
1467                 virtio_error(vdev, "Looped descriptor");
1468                 goto err;
1469             }
1470 
1471             if (desc.flags & VRING_DESC_F_WRITE) {
1472                 in_total += desc.len;
1473             } else {
1474                 out_total += desc.len;
1475             }
1476             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1477                 goto done;
1478             }
1479 
1480             rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1481                                                  &i, desc_cache ==
1482                                                  &indirect_desc_cache);
1483         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1484 
1485         if (desc_cache == &indirect_desc_cache) {
1486             address_space_cache_destroy(&indirect_desc_cache);
1487             total_bufs++;
1488             idx++;
1489         } else {
1490             idx += num_bufs - total_bufs;
1491             total_bufs = num_bufs;
1492         }
1493 
1494         if (idx >= vq->vring.num) {
1495             idx -= vq->vring.num;
1496             wrap_counter ^= 1;
1497         }
1498     }
1499 
1500     /* Record the index and wrap counter for a kick we want */
1501     vq->shadow_avail_idx = idx;
1502     vq->shadow_avail_wrap_counter = wrap_counter;
1503 done:
1504     address_space_cache_destroy(&indirect_desc_cache);
1505     if (in_bytes) {
1506         *in_bytes = in_total;
1507     }
1508     if (out_bytes) {
1509         *out_bytes = out_total;
1510     }
1511     return;
1512 
1513 err:
1514     in_total = out_total = 0;
1515     goto done;
1516 }
1517 
virtqueue_get_avail_bytes(VirtQueue * vq,unsigned int * in_bytes,unsigned int * out_bytes,unsigned max_in_bytes,unsigned max_out_bytes)1518 int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1519                               unsigned int *out_bytes, unsigned max_in_bytes,
1520                               unsigned max_out_bytes)
1521 {
1522     uint16_t desc_size;
1523     VRingMemoryRegionCaches *caches;
1524 
1525     RCU_READ_LOCK_GUARD();
1526 
1527     if (unlikely(!vq->vring.desc)) {
1528         goto err;
1529     }
1530 
1531     caches = vring_get_region_caches(vq);
1532     if (!caches) {
1533         goto err;
1534     }
1535 
1536     desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1537                                 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1538     if (caches->desc.len < vq->vring.num * desc_size) {
1539         virtio_error(vq->vdev, "Cannot map descriptor ring");
1540         goto err;
1541     }
1542 
1543     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1544         virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1545                                          max_in_bytes, max_out_bytes,
1546                                          caches);
1547     } else {
1548         virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1549                                         max_in_bytes, max_out_bytes,
1550                                         caches);
1551     }
1552 
1553     return (int)vq->shadow_avail_idx;
1554 err:
1555     if (in_bytes) {
1556         *in_bytes = 0;
1557     }
1558     if (out_bytes) {
1559         *out_bytes = 0;
1560     }
1561 
1562     return -1;
1563 }
1564 
virtqueue_avail_bytes(VirtQueue * vq,unsigned int in_bytes,unsigned int out_bytes)1565 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1566                           unsigned int out_bytes)
1567 {
1568     unsigned int in_total, out_total;
1569 
1570     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1571     return in_bytes <= in_total && out_bytes <= out_total;
1572 }
1573 
virtqueue_map_desc(VirtIODevice * vdev,unsigned int * p_num_sg,hwaddr * addr,struct iovec * iov,unsigned int max_num_sg,bool is_write,hwaddr pa,size_t sz)1574 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1575                                hwaddr *addr, struct iovec *iov,
1576                                unsigned int max_num_sg, bool is_write,
1577                                hwaddr pa, size_t sz)
1578 {
1579     bool ok = false;
1580     unsigned num_sg = *p_num_sg;
1581     assert(num_sg <= max_num_sg);
1582 
1583     if (!sz) {
1584         virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1585         goto out;
1586     }
1587 
1588     while (sz) {
1589         hwaddr len = sz;
1590 
1591         if (num_sg == max_num_sg) {
1592             virtio_error(vdev, "virtio: too many write descriptors in "
1593                                "indirect table");
1594             goto out;
1595         }
1596 
1597         iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1598                                               is_write ?
1599                                               DMA_DIRECTION_FROM_DEVICE :
1600                                               DMA_DIRECTION_TO_DEVICE,
1601                                               MEMTXATTRS_UNSPECIFIED);
1602         if (!iov[num_sg].iov_base) {
1603             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1604             goto out;
1605         }
1606 
1607         iov[num_sg].iov_len = len;
1608         addr[num_sg] = pa;
1609 
1610         sz -= len;
1611         pa += len;
1612         num_sg++;
1613     }
1614     ok = true;
1615 
1616 out:
1617     *p_num_sg = num_sg;
1618     return ok;
1619 }
1620 
1621 /* Only used by error code paths before we have a VirtQueueElement (therefore
1622  * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1623  * yet.
1624  */
virtqueue_undo_map_desc(unsigned int out_num,unsigned int in_num,struct iovec * iov)1625 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1626                                     struct iovec *iov)
1627 {
1628     unsigned int i;
1629 
1630     for (i = 0; i < out_num + in_num; i++) {
1631         int is_write = i >= out_num;
1632 
1633         cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1634         iov++;
1635     }
1636 }
1637 
virtqueue_map_iovec(VirtIODevice * vdev,struct iovec * sg,hwaddr * addr,unsigned int num_sg,bool is_write)1638 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1639                                 hwaddr *addr, unsigned int num_sg,
1640                                 bool is_write)
1641 {
1642     unsigned int i;
1643     hwaddr len;
1644 
1645     for (i = 0; i < num_sg; i++) {
1646         len = sg[i].iov_len;
1647         sg[i].iov_base = dma_memory_map(vdev->dma_as,
1648                                         addr[i], &len, is_write ?
1649                                         DMA_DIRECTION_FROM_DEVICE :
1650                                         DMA_DIRECTION_TO_DEVICE,
1651                                         MEMTXATTRS_UNSPECIFIED);
1652         if (!sg[i].iov_base) {
1653             error_report("virtio: error trying to map MMIO memory");
1654             exit(1);
1655         }
1656         if (len != sg[i].iov_len) {
1657             error_report("virtio: unexpected memory split");
1658             exit(1);
1659         }
1660     }
1661 }
1662 
virtqueue_map(VirtIODevice * vdev,VirtQueueElement * elem)1663 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1664 {
1665     virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1666     virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1667                                                                         false);
1668 }
1669 
virtqueue_alloc_element(size_t sz,unsigned out_num,unsigned in_num)1670 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1671 {
1672     VirtQueueElement *elem;
1673     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1674     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1675     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1676     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1677     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1678     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1679 
1680     assert(sz >= sizeof(VirtQueueElement));
1681     elem = g_malloc(out_sg_end);
1682     trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1683     elem->out_num = out_num;
1684     elem->in_num = in_num;
1685     elem->in_addr = (void *)elem + in_addr_ofs;
1686     elem->out_addr = (void *)elem + out_addr_ofs;
1687     elem->in_sg = (void *)elem + in_sg_ofs;
1688     elem->out_sg = (void *)elem + out_sg_ofs;
1689     return elem;
1690 }
1691 
virtqueue_split_pop(VirtQueue * vq,size_t sz)1692 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1693 {
1694     unsigned int i, head, max, idx;
1695     VRingMemoryRegionCaches *caches;
1696     MemoryRegionCache indirect_desc_cache;
1697     MemoryRegionCache *desc_cache;
1698     int64_t len;
1699     VirtIODevice *vdev = vq->vdev;
1700     VirtQueueElement *elem = NULL;
1701     unsigned out_num, in_num, elem_entries;
1702     hwaddr QEMU_UNINITIALIZED addr[VIRTQUEUE_MAX_SIZE];
1703     struct iovec QEMU_UNINITIALIZED iov[VIRTQUEUE_MAX_SIZE];
1704     VRingDesc desc;
1705     int rc;
1706 
1707     address_space_cache_init_empty(&indirect_desc_cache);
1708 
1709     RCU_READ_LOCK_GUARD();
1710     if (virtio_queue_empty_rcu(vq)) {
1711         goto done;
1712     }
1713     /* Needed after virtio_queue_empty(), see comment in
1714      * virtqueue_num_heads(). */
1715     smp_rmb();
1716 
1717     /* When we start there are none of either input nor output. */
1718     out_num = in_num = elem_entries = 0;
1719 
1720     max = vq->vring.num;
1721 
1722     if (vq->inuse >= vq->vring.num) {
1723         virtio_error(vdev, "Virtqueue size exceeded");
1724         goto done;
1725     }
1726 
1727     if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1728         goto done;
1729     }
1730 
1731     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1732         vring_set_avail_event(vq, vq->last_avail_idx);
1733     }
1734 
1735     i = head;
1736 
1737     caches = vring_get_region_caches(vq);
1738     if (!caches) {
1739         virtio_error(vdev, "Region caches not initialized");
1740         goto done;
1741     }
1742 
1743     if (caches->desc.len < max * sizeof(VRingDesc)) {
1744         virtio_error(vdev, "Cannot map descriptor ring");
1745         goto done;
1746     }
1747 
1748     desc_cache = &caches->desc;
1749     vring_split_desc_read(vdev, &desc, desc_cache, i);
1750     if (desc.flags & VRING_DESC_F_INDIRECT) {
1751         if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1752             virtio_error(vdev, "Invalid size for indirect buffer table");
1753             goto done;
1754         }
1755         virtio_check_indirect_feature(vdev);
1756 
1757         /* loop over the indirect descriptor table */
1758         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1759                                        desc.addr, desc.len, false);
1760         desc_cache = &indirect_desc_cache;
1761         if (len < desc.len) {
1762             virtio_error(vdev, "Cannot map indirect buffer");
1763             goto done;
1764         }
1765 
1766         max = desc.len / sizeof(VRingDesc);
1767         i = 0;
1768         vring_split_desc_read(vdev, &desc, desc_cache, i);
1769     }
1770 
1771     /* Collect all the descriptors */
1772     do {
1773         bool map_ok;
1774 
1775         if (desc.flags & VRING_DESC_F_WRITE) {
1776             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1777                                         iov + out_num,
1778                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1779                                         desc.addr, desc.len);
1780         } else {
1781             if (in_num) {
1782                 virtio_error(vdev, "Incorrect order for descriptors");
1783                 goto err_undo_map;
1784             }
1785             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1786                                         VIRTQUEUE_MAX_SIZE, false,
1787                                         desc.addr, desc.len);
1788         }
1789         if (!map_ok) {
1790             goto err_undo_map;
1791         }
1792 
1793         /* If we've got too many, that implies a descriptor loop. */
1794         if (++elem_entries > max) {
1795             virtio_error(vdev, "Looped descriptor");
1796             goto err_undo_map;
1797         }
1798 
1799         rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max);
1800     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1801 
1802     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1803         goto err_undo_map;
1804     }
1805 
1806     /* Now copy what we have collected and mapped */
1807     elem = virtqueue_alloc_element(sz, out_num, in_num);
1808     elem->index = head;
1809     elem->ndescs = 1;
1810     for (i = 0; i < out_num; i++) {
1811         elem->out_addr[i] = addr[i];
1812         elem->out_sg[i] = iov[i];
1813     }
1814     for (i = 0; i < in_num; i++) {
1815         elem->in_addr[i] = addr[out_num + i];
1816         elem->in_sg[i] = iov[out_num + i];
1817     }
1818 
1819     if (virtio_vdev_has_feature(vdev, VIRTIO_F_IN_ORDER)) {
1820         idx = (vq->last_avail_idx - 1) % vq->vring.num;
1821         vq->used_elems[idx].index = elem->index;
1822         vq->used_elems[idx].len = elem->len;
1823         vq->used_elems[idx].ndescs = elem->ndescs;
1824     }
1825 
1826     vq->inuse++;
1827 
1828     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1829 done:
1830     address_space_cache_destroy(&indirect_desc_cache);
1831 
1832     return elem;
1833 
1834 err_undo_map:
1835     virtqueue_undo_map_desc(out_num, in_num, iov);
1836     goto done;
1837 }
1838 
virtqueue_packed_pop(VirtQueue * vq,size_t sz)1839 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1840 {
1841     unsigned int i, max;
1842     VRingMemoryRegionCaches *caches;
1843     MemoryRegionCache indirect_desc_cache;
1844     MemoryRegionCache *desc_cache;
1845     int64_t len;
1846     VirtIODevice *vdev = vq->vdev;
1847     VirtQueueElement *elem = NULL;
1848     unsigned out_num, in_num, elem_entries;
1849     hwaddr QEMU_UNINITIALIZED addr[VIRTQUEUE_MAX_SIZE];
1850     struct iovec QEMU_UNINITIALIZED iov[VIRTQUEUE_MAX_SIZE];
1851     VRingPackedDesc desc;
1852     uint16_t id;
1853     int rc;
1854 
1855     address_space_cache_init_empty(&indirect_desc_cache);
1856 
1857     RCU_READ_LOCK_GUARD();
1858     if (virtio_queue_packed_empty_rcu(vq)) {
1859         goto done;
1860     }
1861 
1862     /* When we start there are none of either input nor output. */
1863     out_num = in_num = elem_entries = 0;
1864 
1865     max = vq->vring.num;
1866 
1867     if (vq->inuse >= vq->vring.num) {
1868         virtio_error(vdev, "Virtqueue size exceeded");
1869         goto done;
1870     }
1871 
1872     i = vq->last_avail_idx;
1873 
1874     caches = vring_get_region_caches(vq);
1875     if (!caches) {
1876         virtio_error(vdev, "Region caches not initialized");
1877         goto done;
1878     }
1879 
1880     if (caches->desc.len < max * sizeof(VRingDesc)) {
1881         virtio_error(vdev, "Cannot map descriptor ring");
1882         goto done;
1883     }
1884 
1885     desc_cache = &caches->desc;
1886     vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1887     id = desc.id;
1888     if (desc.flags & VRING_DESC_F_INDIRECT) {
1889         if (desc.len % sizeof(VRingPackedDesc)) {
1890             virtio_error(vdev, "Invalid size for indirect buffer table");
1891             goto done;
1892         }
1893         virtio_check_indirect_feature(vdev);
1894 
1895         /* loop over the indirect descriptor table */
1896         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1897                                        desc.addr, desc.len, false);
1898         desc_cache = &indirect_desc_cache;
1899         if (len < desc.len) {
1900             virtio_error(vdev, "Cannot map indirect buffer");
1901             goto done;
1902         }
1903 
1904         max = desc.len / sizeof(VRingPackedDesc);
1905         i = 0;
1906         vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1907     }
1908 
1909     /* Collect all the descriptors */
1910     do {
1911         bool map_ok;
1912 
1913         if (desc.flags & VRING_DESC_F_WRITE) {
1914             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1915                                         iov + out_num,
1916                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1917                                         desc.addr, desc.len);
1918         } else {
1919             if (in_num) {
1920                 virtio_error(vdev, "Incorrect order for descriptors");
1921                 goto err_undo_map;
1922             }
1923             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1924                                         VIRTQUEUE_MAX_SIZE, false,
1925                                         desc.addr, desc.len);
1926         }
1927         if (!map_ok) {
1928             goto err_undo_map;
1929         }
1930 
1931         /* If we've got too many, that implies a descriptor loop. */
1932         if (++elem_entries > max) {
1933             virtio_error(vdev, "Looped descriptor");
1934             goto err_undo_map;
1935         }
1936 
1937         rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1938                                              desc_cache ==
1939                                              &indirect_desc_cache);
1940     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1941 
1942     if (desc_cache != &indirect_desc_cache) {
1943         /* Buffer ID is included in the last descriptor in the list. */
1944         id = desc.id;
1945     }
1946 
1947     /* Now copy what we have collected and mapped */
1948     elem = virtqueue_alloc_element(sz, out_num, in_num);
1949     for (i = 0; i < out_num; i++) {
1950         elem->out_addr[i] = addr[i];
1951         elem->out_sg[i] = iov[i];
1952     }
1953     for (i = 0; i < in_num; i++) {
1954         elem->in_addr[i] = addr[out_num + i];
1955         elem->in_sg[i] = iov[out_num + i];
1956     }
1957 
1958     elem->index = id;
1959     elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1960 
1961     if (virtio_vdev_has_feature(vdev, VIRTIO_F_IN_ORDER)) {
1962         vq->used_elems[vq->last_avail_idx].index = elem->index;
1963         vq->used_elems[vq->last_avail_idx].len = elem->len;
1964         vq->used_elems[vq->last_avail_idx].ndescs = elem->ndescs;
1965     }
1966 
1967     vq->last_avail_idx += elem->ndescs;
1968     vq->inuse += elem->ndescs;
1969 
1970     if (vq->last_avail_idx >= vq->vring.num) {
1971         vq->last_avail_idx -= vq->vring.num;
1972         vq->last_avail_wrap_counter ^= 1;
1973     }
1974 
1975     vq->shadow_avail_idx = vq->last_avail_idx;
1976     vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1977 
1978     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1979 done:
1980     address_space_cache_destroy(&indirect_desc_cache);
1981 
1982     return elem;
1983 
1984 err_undo_map:
1985     virtqueue_undo_map_desc(out_num, in_num, iov);
1986     goto done;
1987 }
1988 
virtqueue_pop(VirtQueue * vq,size_t sz)1989 void *virtqueue_pop(VirtQueue *vq, size_t sz)
1990 {
1991     if (virtio_device_disabled(vq->vdev)) {
1992         return NULL;
1993     }
1994 
1995     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1996         return virtqueue_packed_pop(vq, sz);
1997     } else {
1998         return virtqueue_split_pop(vq, sz);
1999     }
2000 }
2001 
virtqueue_packed_drop_all(VirtQueue * vq)2002 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
2003 {
2004     VRingMemoryRegionCaches *caches;
2005     MemoryRegionCache *desc_cache;
2006     unsigned int dropped = 0;
2007     VirtQueueElement elem = {};
2008     VirtIODevice *vdev = vq->vdev;
2009     VRingPackedDesc desc;
2010 
2011     RCU_READ_LOCK_GUARD();
2012 
2013     caches = vring_get_region_caches(vq);
2014     if (!caches) {
2015         return 0;
2016     }
2017 
2018     desc_cache = &caches->desc;
2019 
2020     virtio_queue_set_notification(vq, 0);
2021 
2022     while (vq->inuse < vq->vring.num) {
2023         unsigned int idx = vq->last_avail_idx;
2024         /*
2025          * works similar to virtqueue_pop but does not map buffers
2026          * and does not allocate any memory.
2027          */
2028         vring_packed_desc_read(vdev, &desc, desc_cache,
2029                                vq->last_avail_idx , true);
2030         if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
2031             break;
2032         }
2033         elem.index = desc.id;
2034         elem.ndescs = 1;
2035         while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
2036                                                vq->vring.num, &idx, false)) {
2037             ++elem.ndescs;
2038         }
2039         /*
2040          * immediately push the element, nothing to unmap
2041          * as both in_num and out_num are set to 0.
2042          */
2043         virtqueue_push(vq, &elem, 0);
2044         dropped++;
2045         vq->last_avail_idx += elem.ndescs;
2046         if (vq->last_avail_idx >= vq->vring.num) {
2047             vq->last_avail_idx -= vq->vring.num;
2048             vq->last_avail_wrap_counter ^= 1;
2049         }
2050     }
2051 
2052     return dropped;
2053 }
2054 
virtqueue_split_drop_all(VirtQueue * vq)2055 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
2056 {
2057     unsigned int dropped = 0;
2058     VirtQueueElement elem = {};
2059     VirtIODevice *vdev = vq->vdev;
2060     bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2061 
2062     while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
2063         /* works similar to virtqueue_pop but does not map buffers
2064         * and does not allocate any memory */
2065         smp_rmb();
2066         if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
2067             break;
2068         }
2069         vq->inuse++;
2070         vq->last_avail_idx++;
2071         if (fEventIdx) {
2072             vring_set_avail_event(vq, vq->last_avail_idx);
2073         }
2074         /* immediately push the element, nothing to unmap
2075          * as both in_num and out_num are set to 0 */
2076         virtqueue_push(vq, &elem, 0);
2077         dropped++;
2078     }
2079 
2080     return dropped;
2081 }
2082 
2083 /* virtqueue_drop_all:
2084  * @vq: The #VirtQueue
2085  * Drops all queued buffers and indicates them to the guest
2086  * as if they are done. Useful when buffers can not be
2087  * processed but must be returned to the guest.
2088  */
virtqueue_drop_all(VirtQueue * vq)2089 unsigned int virtqueue_drop_all(VirtQueue *vq)
2090 {
2091     struct VirtIODevice *vdev = vq->vdev;
2092 
2093     if (virtio_device_disabled(vq->vdev)) {
2094         return 0;
2095     }
2096 
2097     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2098         return virtqueue_packed_drop_all(vq);
2099     } else {
2100         return virtqueue_split_drop_all(vq);
2101     }
2102 }
2103 
2104 /* Reading and writing a structure directly to QEMUFile is *awful*, but
2105  * it is what QEMU has always done by mistake.  We can change it sooner
2106  * or later by bumping the version number of the affected vm states.
2107  * In the meanwhile, since the in-memory layout of VirtQueueElement
2108  * has changed, we need to marshal to and from the layout that was
2109  * used before the change.
2110  */
2111 typedef struct VirtQueueElementOld {
2112     unsigned int index;
2113     unsigned int out_num;
2114     unsigned int in_num;
2115     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
2116     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
2117     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
2118     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
2119 } VirtQueueElementOld;
2120 
qemu_get_virtqueue_element(VirtIODevice * vdev,QEMUFile * f,size_t sz)2121 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
2122 {
2123     VirtQueueElement *elem;
2124     VirtQueueElementOld data;
2125     int i;
2126 
2127     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
2128 
2129     /* TODO: teach all callers that this can fail, and return failure instead
2130      * of asserting here.
2131      * This is just one thing (there are probably more) that must be
2132      * fixed before we can allow NDEBUG compilation.
2133      */
2134     assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
2135     assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
2136 
2137     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
2138     elem->index = data.index;
2139 
2140     for (i = 0; i < elem->in_num; i++) {
2141         elem->in_addr[i] = data.in_addr[i];
2142     }
2143 
2144     for (i = 0; i < elem->out_num; i++) {
2145         elem->out_addr[i] = data.out_addr[i];
2146     }
2147 
2148     for (i = 0; i < elem->in_num; i++) {
2149         /* Base is overwritten by virtqueue_map.  */
2150         elem->in_sg[i].iov_base = 0;
2151         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
2152     }
2153 
2154     for (i = 0; i < elem->out_num; i++) {
2155         /* Base is overwritten by virtqueue_map.  */
2156         elem->out_sg[i].iov_base = 0;
2157         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
2158     }
2159 
2160     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2161         qemu_get_be32s(f, &elem->ndescs);
2162     }
2163 
2164     virtqueue_map(vdev, elem);
2165     return elem;
2166 }
2167 
qemu_put_virtqueue_element(VirtIODevice * vdev,QEMUFile * f,VirtQueueElement * elem)2168 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
2169                                 VirtQueueElement *elem)
2170 {
2171     VirtQueueElementOld data;
2172     int i;
2173 
2174     memset(&data, 0, sizeof(data));
2175     data.index = elem->index;
2176     data.in_num = elem->in_num;
2177     data.out_num = elem->out_num;
2178 
2179     for (i = 0; i < elem->in_num; i++) {
2180         data.in_addr[i] = elem->in_addr[i];
2181     }
2182 
2183     for (i = 0; i < elem->out_num; i++) {
2184         data.out_addr[i] = elem->out_addr[i];
2185     }
2186 
2187     for (i = 0; i < elem->in_num; i++) {
2188         /* Base is overwritten by virtqueue_map when loading.  Do not
2189          * save it, as it would leak the QEMU address space layout.  */
2190         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
2191     }
2192 
2193     for (i = 0; i < elem->out_num; i++) {
2194         /* Do not save iov_base as above.  */
2195         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
2196     }
2197 
2198     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2199         qemu_put_be32s(f, &elem->ndescs);
2200     }
2201 
2202     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
2203 }
2204 
2205 /* virtio device */
virtio_notify_vector(VirtIODevice * vdev,uint16_t vector)2206 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
2207 {
2208     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2209     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2210 
2211     if (virtio_device_disabled(vdev)) {
2212         return;
2213     }
2214 
2215     if (k->notify) {
2216         k->notify(qbus->parent, vector);
2217     }
2218 }
2219 
virtio_update_irq(VirtIODevice * vdev)2220 void virtio_update_irq(VirtIODevice *vdev)
2221 {
2222     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
2223 }
2224 
virtio_validate_features(VirtIODevice * vdev)2225 static int virtio_validate_features(VirtIODevice *vdev)
2226 {
2227     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2228 
2229     if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
2230         !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
2231         return -EFAULT;
2232     }
2233 
2234     if (k->validate_features) {
2235         return k->validate_features(vdev);
2236     } else {
2237         return 0;
2238     }
2239 }
2240 
virtio_set_status(VirtIODevice * vdev,uint8_t val)2241 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
2242 {
2243     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2244     trace_virtio_set_status(vdev, val);
2245     int ret = 0;
2246 
2247     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2248         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
2249             val & VIRTIO_CONFIG_S_FEATURES_OK) {
2250             ret = virtio_validate_features(vdev);
2251             if (ret) {
2252                 return ret;
2253             }
2254         }
2255     }
2256 
2257     if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
2258         (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
2259         virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
2260     }
2261 
2262     if (k->set_status) {
2263         ret = k->set_status(vdev, val);
2264         if (ret) {
2265             qemu_log("set %s status to %d failed, old status: %d\n",
2266                      vdev->name, val, vdev->status);
2267         }
2268     }
2269     vdev->status = val;
2270 
2271     return ret;
2272 }
2273 
virtio_default_endian(void)2274 static enum virtio_device_endian virtio_default_endian(void)
2275 {
2276     if (target_big_endian()) {
2277         return VIRTIO_DEVICE_ENDIAN_BIG;
2278     } else {
2279         return VIRTIO_DEVICE_ENDIAN_LITTLE;
2280     }
2281 }
2282 
virtio_current_cpu_endian(void)2283 static enum virtio_device_endian virtio_current_cpu_endian(void)
2284 {
2285     if (cpu_virtio_is_big_endian(current_cpu)) {
2286         return VIRTIO_DEVICE_ENDIAN_BIG;
2287     } else {
2288         return VIRTIO_DEVICE_ENDIAN_LITTLE;
2289     }
2290 }
2291 
__virtio_queue_reset(VirtIODevice * vdev,uint32_t i)2292 static void __virtio_queue_reset(VirtIODevice *vdev, uint32_t i)
2293 {
2294     vdev->vq[i].vring.desc = 0;
2295     vdev->vq[i].vring.avail = 0;
2296     vdev->vq[i].vring.used = 0;
2297     vdev->vq[i].last_avail_idx = 0;
2298     vdev->vq[i].shadow_avail_idx = 0;
2299     vdev->vq[i].used_idx = 0;
2300     vdev->vq[i].last_avail_wrap_counter = true;
2301     vdev->vq[i].shadow_avail_wrap_counter = true;
2302     vdev->vq[i].used_wrap_counter = true;
2303     virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2304     vdev->vq[i].signalled_used = 0;
2305     vdev->vq[i].signalled_used_valid = false;
2306     vdev->vq[i].notification = true;
2307     vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2308     vdev->vq[i].inuse = 0;
2309     virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2310 }
2311 
virtio_queue_reset(VirtIODevice * vdev,uint32_t queue_index)2312 void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
2313 {
2314     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2315 
2316     if (k->queue_reset) {
2317         k->queue_reset(vdev, queue_index);
2318     }
2319 
2320     __virtio_queue_reset(vdev, queue_index);
2321 }
2322 
virtio_queue_enable(VirtIODevice * vdev,uint32_t queue_index)2323 void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
2324 {
2325     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2326 
2327     /*
2328      * TODO: Seabios is currently out of spec and triggering this error.
2329      * So this needs to be fixed in Seabios, then this can
2330      * be re-enabled for new machine types only, and also after
2331      * being converted to LOG_GUEST_ERROR.
2332      *
2333     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2334         error_report("queue_enable is only supported in devices of virtio "
2335                      "1.0 or later.");
2336     }
2337     */
2338 
2339     if (k->queue_enable) {
2340         k->queue_enable(vdev, queue_index);
2341     }
2342 }
2343 
virtio_queue_set_addr(VirtIODevice * vdev,int n,hwaddr addr)2344 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2345 {
2346     if (!vdev->vq[n].vring.num) {
2347         return;
2348     }
2349     vdev->vq[n].vring.desc = addr;
2350     virtio_queue_update_rings(vdev, n);
2351 }
2352 
virtio_queue_get_addr(VirtIODevice * vdev,int n)2353 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2354 {
2355     return vdev->vq[n].vring.desc;
2356 }
2357 
virtio_queue_set_rings(VirtIODevice * vdev,int n,hwaddr desc,hwaddr avail,hwaddr used)2358 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2359                             hwaddr avail, hwaddr used)
2360 {
2361     if (!vdev->vq[n].vring.num) {
2362         return;
2363     }
2364     vdev->vq[n].vring.desc = desc;
2365     vdev->vq[n].vring.avail = avail;
2366     vdev->vq[n].vring.used = used;
2367     virtio_init_region_cache(vdev, n);
2368 }
2369 
virtio_queue_set_num(VirtIODevice * vdev,int n,int num)2370 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2371 {
2372     /* Don't allow guest to flip queue between existent and
2373      * nonexistent states, or to set it to an invalid size.
2374      */
2375     if (!!num != !!vdev->vq[n].vring.num ||
2376         num > VIRTQUEUE_MAX_SIZE ||
2377         num < 0) {
2378         return;
2379     }
2380     vdev->vq[n].vring.num = num;
2381 }
2382 
virtio_vector_first_queue(VirtIODevice * vdev,uint16_t vector)2383 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2384 {
2385     return QLIST_FIRST(&vdev->vector_queues[vector]);
2386 }
2387 
virtio_vector_next_queue(VirtQueue * vq)2388 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2389 {
2390     return QLIST_NEXT(vq, node);
2391 }
2392 
virtio_queue_get_num(VirtIODevice * vdev,int n)2393 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2394 {
2395     return vdev->vq[n].vring.num;
2396 }
2397 
virtio_queue_get_max_num(VirtIODevice * vdev,int n)2398 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2399 {
2400     return vdev->vq[n].vring.num_default;
2401 }
2402 
virtio_get_num_queues(VirtIODevice * vdev)2403 int virtio_get_num_queues(VirtIODevice *vdev)
2404 {
2405     int i;
2406 
2407     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2408         if (!virtio_queue_get_num(vdev, i)) {
2409             break;
2410         }
2411     }
2412 
2413     return i;
2414 }
2415 
virtio_queue_set_align(VirtIODevice * vdev,int n,int align)2416 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2417 {
2418     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2419     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2420 
2421     /* virtio-1 compliant devices cannot change the alignment */
2422     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2423         error_report("tried to modify queue alignment for virtio-1 device");
2424         return;
2425     }
2426     /* Check that the transport told us it was going to do this
2427      * (so a buggy transport will immediately assert rather than
2428      * silently failing to migrate this state)
2429      */
2430     assert(k->has_variable_vring_alignment);
2431 
2432     if (align) {
2433         vdev->vq[n].vring.align = align;
2434         virtio_queue_update_rings(vdev, n);
2435     }
2436 }
2437 
virtio_queue_set_shadow_avail_idx(VirtQueue * vq,uint16_t shadow_avail_idx)2438 void virtio_queue_set_shadow_avail_idx(VirtQueue *vq, uint16_t shadow_avail_idx)
2439 {
2440     if (!vq->vring.desc) {
2441         return;
2442     }
2443 
2444     /*
2445      * 16-bit data for packed VQs include 1-bit wrap counter and
2446      * 15-bit shadow_avail_idx.
2447      */
2448     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
2449         vq->shadow_avail_wrap_counter = (shadow_avail_idx >> 15) & 0x1;
2450         vq->shadow_avail_idx = shadow_avail_idx & 0x7FFF;
2451     } else {
2452         vq->shadow_avail_idx = shadow_avail_idx;
2453     }
2454 }
2455 
virtio_queue_notify_vq(VirtQueue * vq)2456 static void virtio_queue_notify_vq(VirtQueue *vq)
2457 {
2458     if (vq->vring.desc && vq->handle_output) {
2459         VirtIODevice *vdev = vq->vdev;
2460 
2461         if (unlikely(vdev->broken)) {
2462             return;
2463         }
2464 
2465         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2466         vq->handle_output(vdev, vq);
2467 
2468         if (unlikely(vdev->start_on_kick)) {
2469             virtio_set_started(vdev, true);
2470         }
2471     }
2472 }
2473 
virtio_queue_notify(VirtIODevice * vdev,int n)2474 void virtio_queue_notify(VirtIODevice *vdev, int n)
2475 {
2476     VirtQueue *vq = &vdev->vq[n];
2477 
2478     if (unlikely(!vq->vring.desc || vdev->broken)) {
2479         return;
2480     }
2481 
2482     trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2483     if (vq->host_notifier_enabled) {
2484         event_notifier_set(&vq->host_notifier);
2485     } else if (vq->handle_output) {
2486         vq->handle_output(vdev, vq);
2487 
2488         if (unlikely(vdev->start_on_kick)) {
2489             virtio_set_started(vdev, true);
2490         }
2491     }
2492 }
2493 
virtio_queue_vector(VirtIODevice * vdev,int n)2494 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2495 {
2496     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2497         VIRTIO_NO_VECTOR;
2498 }
2499 
virtio_queue_set_vector(VirtIODevice * vdev,int n,uint16_t vector)2500 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2501 {
2502     VirtQueue *vq = &vdev->vq[n];
2503 
2504     if (n < VIRTIO_QUEUE_MAX) {
2505         if (vdev->vector_queues &&
2506             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2507             QLIST_REMOVE(vq, node);
2508         }
2509         vdev->vq[n].vector = vector;
2510         if (vdev->vector_queues &&
2511             vector != VIRTIO_NO_VECTOR) {
2512             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2513         }
2514     }
2515 }
2516 
virtio_add_queue(VirtIODevice * vdev,int queue_size,VirtIOHandleOutput handle_output)2517 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2518                             VirtIOHandleOutput handle_output)
2519 {
2520     int i;
2521 
2522     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2523         if (vdev->vq[i].vring.num == 0)
2524             break;
2525     }
2526 
2527     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2528         abort();
2529 
2530     vdev->vq[i].vring.num = queue_size;
2531     vdev->vq[i].vring.num_default = queue_size;
2532     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2533     vdev->vq[i].handle_output = handle_output;
2534     vdev->vq[i].used_elems = g_new0(VirtQueueElement, queue_size);
2535 
2536     return &vdev->vq[i];
2537 }
2538 
virtio_delete_queue(VirtQueue * vq)2539 void virtio_delete_queue(VirtQueue *vq)
2540 {
2541     vq->vring.num = 0;
2542     vq->vring.num_default = 0;
2543     vq->handle_output = NULL;
2544     g_free(vq->used_elems);
2545     vq->used_elems = NULL;
2546     virtio_virtqueue_reset_region_cache(vq);
2547 }
2548 
virtio_del_queue(VirtIODevice * vdev,int n)2549 void virtio_del_queue(VirtIODevice *vdev, int n)
2550 {
2551     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2552         abort();
2553     }
2554 
2555     virtio_delete_queue(&vdev->vq[n]);
2556 }
2557 
virtio_set_isr(VirtIODevice * vdev,int value)2558 static void virtio_set_isr(VirtIODevice *vdev, int value)
2559 {
2560     uint8_t old = qatomic_read(&vdev->isr);
2561 
2562     /* Do not write ISR if it does not change, so that its cacheline remains
2563      * shared in the common case where the guest does not read it.
2564      */
2565     if ((old & value) != value) {
2566         qatomic_or(&vdev->isr, value);
2567     }
2568 }
2569 
2570 /* Called within rcu_read_lock(). */
virtio_split_should_notify(VirtIODevice * vdev,VirtQueue * vq)2571 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2572 {
2573     uint16_t old, new;
2574     bool v;
2575     /* We need to expose used array entries before checking used event. */
2576     smp_mb();
2577     /* Always notify when queue is empty (when feature acknowledge) */
2578     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2579         !vq->inuse && virtio_queue_empty(vq)) {
2580         return true;
2581     }
2582 
2583     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2584         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2585     }
2586 
2587     v = vq->signalled_used_valid;
2588     vq->signalled_used_valid = true;
2589     old = vq->signalled_used;
2590     new = vq->signalled_used = vq->used_idx;
2591     return !v || vring_need_event(vring_get_used_event(vq), new, old);
2592 }
2593 
vring_packed_need_event(VirtQueue * vq,bool wrap,uint16_t off_wrap,uint16_t new,uint16_t old)2594 static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2595                                     uint16_t off_wrap, uint16_t new,
2596                                     uint16_t old)
2597 {
2598     int off = off_wrap & ~(1 << 15);
2599 
2600     if (wrap != off_wrap >> 15) {
2601         off -= vq->vring.num;
2602     }
2603 
2604     return vring_need_event(off, new, old);
2605 }
2606 
2607 /* Called within rcu_read_lock(). */
virtio_packed_should_notify(VirtIODevice * vdev,VirtQueue * vq)2608 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2609 {
2610     VRingPackedDescEvent e;
2611     uint16_t old, new;
2612     bool v;
2613     VRingMemoryRegionCaches *caches;
2614 
2615     caches = vring_get_region_caches(vq);
2616     if (!caches) {
2617         return false;
2618     }
2619 
2620     vring_packed_event_read(vdev, &caches->avail, &e);
2621 
2622     old = vq->signalled_used;
2623     new = vq->signalled_used = vq->used_idx;
2624     v = vq->signalled_used_valid;
2625     vq->signalled_used_valid = true;
2626 
2627     if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2628         return false;
2629     } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2630         return true;
2631     }
2632 
2633     return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2634                                          e.off_wrap, new, old);
2635 }
2636 
2637 /* Called within rcu_read_lock().  */
virtio_should_notify(VirtIODevice * vdev,VirtQueue * vq)2638 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2639 {
2640     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2641         return virtio_packed_should_notify(vdev, vq);
2642     } else {
2643         return virtio_split_should_notify(vdev, vq);
2644     }
2645 }
2646 
2647 /* Batch irqs while inside a defer_call_begin()/defer_call_end() section */
virtio_notify_irqfd_deferred_fn(void * opaque)2648 static void virtio_notify_irqfd_deferred_fn(void *opaque)
2649 {
2650     EventNotifier *notifier = opaque;
2651     VirtQueue *vq = container_of(notifier, VirtQueue, guest_notifier);
2652 
2653     trace_virtio_notify_irqfd_deferred_fn(vq->vdev, vq);
2654     event_notifier_set(notifier);
2655 }
2656 
virtio_notify_irqfd(VirtIODevice * vdev,VirtQueue * vq)2657 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2658 {
2659     WITH_RCU_READ_LOCK_GUARD() {
2660         if (!virtio_should_notify(vdev, vq)) {
2661             return;
2662         }
2663     }
2664 
2665     trace_virtio_notify_irqfd(vdev, vq);
2666 
2667     /*
2668      * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2669      * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2670      * incorrectly polling this bit during crashdump and hibernation
2671      * in MSI mode, causing a hang if this bit is never updated.
2672      * Recent releases of Windows do not really shut down, but rather
2673      * log out and hibernate to make the next startup faster.  Hence,
2674      * this manifested as a more serious hang during shutdown with
2675      *
2676      * Next driver release from 2016 fixed this problem, so working around it
2677      * is not a must, but it's easy to do so let's do it here.
2678      *
2679      * Note: it's safe to update ISR from any thread as it was switched
2680      * to an atomic operation.
2681      */
2682     virtio_set_isr(vq->vdev, 0x1);
2683     defer_call(virtio_notify_irqfd_deferred_fn, &vq->guest_notifier);
2684 }
2685 
virtio_irq(VirtQueue * vq)2686 static void virtio_irq(VirtQueue *vq)
2687 {
2688     virtio_set_isr(vq->vdev, 0x1);
2689     virtio_notify_vector(vq->vdev, vq->vector);
2690 }
2691 
virtio_notify(VirtIODevice * vdev,VirtQueue * vq)2692 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2693 {
2694     WITH_RCU_READ_LOCK_GUARD() {
2695         if (!virtio_should_notify(vdev, vq)) {
2696             return;
2697         }
2698     }
2699 
2700     trace_virtio_notify(vdev, vq);
2701     virtio_irq(vq);
2702 }
2703 
virtio_notify_config(VirtIODevice * vdev)2704 void virtio_notify_config(VirtIODevice *vdev)
2705 {
2706     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2707         return;
2708 
2709     virtio_set_isr(vdev, 0x3);
2710     vdev->generation++;
2711     virtio_notify_vector(vdev, vdev->config_vector);
2712 }
2713 
virtio_device_endian_needed(void * opaque)2714 static bool virtio_device_endian_needed(void *opaque)
2715 {
2716     VirtIODevice *vdev = opaque;
2717 
2718     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2719     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2720         return vdev->device_endian != virtio_default_endian();
2721     }
2722     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2723     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2724 }
2725 
virtio_64bit_features_needed(void * opaque)2726 static bool virtio_64bit_features_needed(void *opaque)
2727 {
2728     VirtIODevice *vdev = opaque;
2729 
2730     return (vdev->host_features >> 32) != 0;
2731 }
2732 
virtio_virtqueue_needed(void * opaque)2733 static bool virtio_virtqueue_needed(void *opaque)
2734 {
2735     VirtIODevice *vdev = opaque;
2736 
2737     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2738 }
2739 
virtio_packed_virtqueue_needed(void * opaque)2740 static bool virtio_packed_virtqueue_needed(void *opaque)
2741 {
2742     VirtIODevice *vdev = opaque;
2743 
2744     return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2745 }
2746 
virtio_ringsize_needed(void * opaque)2747 static bool virtio_ringsize_needed(void *opaque)
2748 {
2749     VirtIODevice *vdev = opaque;
2750     int i;
2751 
2752     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2753         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2754             return true;
2755         }
2756     }
2757     return false;
2758 }
2759 
virtio_extra_state_needed(void * opaque)2760 static bool virtio_extra_state_needed(void *opaque)
2761 {
2762     VirtIODevice *vdev = opaque;
2763     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2764     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2765 
2766     return k->has_extra_state &&
2767         k->has_extra_state(qbus->parent);
2768 }
2769 
virtio_broken_needed(void * opaque)2770 static bool virtio_broken_needed(void *opaque)
2771 {
2772     VirtIODevice *vdev = opaque;
2773 
2774     return vdev->broken;
2775 }
2776 
virtio_started_needed(void * opaque)2777 static bool virtio_started_needed(void *opaque)
2778 {
2779     VirtIODevice *vdev = opaque;
2780 
2781     return vdev->started;
2782 }
2783 
virtio_disabled_needed(void * opaque)2784 static bool virtio_disabled_needed(void *opaque)
2785 {
2786     VirtIODevice *vdev = opaque;
2787 
2788     return vdev->disabled;
2789 }
2790 
2791 static const VMStateDescription vmstate_virtqueue = {
2792     .name = "virtqueue_state",
2793     .version_id = 1,
2794     .minimum_version_id = 1,
2795     .fields = (const VMStateField[]) {
2796         VMSTATE_UINT64(vring.avail, struct VirtQueue),
2797         VMSTATE_UINT64(vring.used, struct VirtQueue),
2798         VMSTATE_END_OF_LIST()
2799     }
2800 };
2801 
2802 static const VMStateDescription vmstate_packed_virtqueue = {
2803     .name = "packed_virtqueue_state",
2804     .version_id = 1,
2805     .minimum_version_id = 1,
2806     .fields = (const VMStateField[]) {
2807         VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2808         VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2809         VMSTATE_UINT16(used_idx, struct VirtQueue),
2810         VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2811         VMSTATE_UINT32(inuse, struct VirtQueue),
2812         VMSTATE_END_OF_LIST()
2813     }
2814 };
2815 
2816 static const VMStateDescription vmstate_virtio_virtqueues = {
2817     .name = "virtio/virtqueues",
2818     .version_id = 1,
2819     .minimum_version_id = 1,
2820     .needed = &virtio_virtqueue_needed,
2821     .fields = (const VMStateField[]) {
2822         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2823                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2824         VMSTATE_END_OF_LIST()
2825     }
2826 };
2827 
2828 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2829     .name = "virtio/packed_virtqueues",
2830     .version_id = 1,
2831     .minimum_version_id = 1,
2832     .needed = &virtio_packed_virtqueue_needed,
2833     .fields = (const VMStateField[]) {
2834         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2835                       VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2836         VMSTATE_END_OF_LIST()
2837     }
2838 };
2839 
2840 static const VMStateDescription vmstate_ringsize = {
2841     .name = "ringsize_state",
2842     .version_id = 1,
2843     .minimum_version_id = 1,
2844     .fields = (const VMStateField[]) {
2845         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2846         VMSTATE_END_OF_LIST()
2847     }
2848 };
2849 
2850 static const VMStateDescription vmstate_virtio_ringsize = {
2851     .name = "virtio/ringsize",
2852     .version_id = 1,
2853     .minimum_version_id = 1,
2854     .needed = &virtio_ringsize_needed,
2855     .fields = (const VMStateField[]) {
2856         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2857                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2858         VMSTATE_END_OF_LIST()
2859     }
2860 };
2861 
get_extra_state(QEMUFile * f,void * pv,size_t size,const VMStateField * field)2862 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2863                            const VMStateField *field)
2864 {
2865     VirtIODevice *vdev = pv;
2866     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2867     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2868 
2869     if (!k->load_extra_state) {
2870         return -1;
2871     } else {
2872         return k->load_extra_state(qbus->parent, f);
2873     }
2874 }
2875 
put_extra_state(QEMUFile * f,void * pv,size_t size,const VMStateField * field,JSONWriter * vmdesc)2876 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2877                            const VMStateField *field, JSONWriter *vmdesc)
2878 {
2879     VirtIODevice *vdev = pv;
2880     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2881     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2882 
2883     k->save_extra_state(qbus->parent, f);
2884     return 0;
2885 }
2886 
2887 static const VMStateInfo vmstate_info_extra_state = {
2888     .name = "virtqueue_extra_state",
2889     .get = get_extra_state,
2890     .put = put_extra_state,
2891 };
2892 
2893 static const VMStateDescription vmstate_virtio_extra_state = {
2894     .name = "virtio/extra_state",
2895     .version_id = 1,
2896     .minimum_version_id = 1,
2897     .needed = &virtio_extra_state_needed,
2898     .fields = (const VMStateField[]) {
2899         {
2900             .name         = "extra_state",
2901             .version_id   = 0,
2902             .field_exists = NULL,
2903             .size         = 0,
2904             .info         = &vmstate_info_extra_state,
2905             .flags        = VMS_SINGLE,
2906             .offset       = 0,
2907         },
2908         VMSTATE_END_OF_LIST()
2909     }
2910 };
2911 
2912 static const VMStateDescription vmstate_virtio_device_endian = {
2913     .name = "virtio/device_endian",
2914     .version_id = 1,
2915     .minimum_version_id = 1,
2916     .needed = &virtio_device_endian_needed,
2917     .fields = (const VMStateField[]) {
2918         VMSTATE_UINT8(device_endian, VirtIODevice),
2919         VMSTATE_END_OF_LIST()
2920     }
2921 };
2922 
2923 static const VMStateDescription vmstate_virtio_64bit_features = {
2924     .name = "virtio/64bit_features",
2925     .version_id = 1,
2926     .minimum_version_id = 1,
2927     .needed = &virtio_64bit_features_needed,
2928     .fields = (const VMStateField[]) {
2929         VMSTATE_UINT64(guest_features, VirtIODevice),
2930         VMSTATE_END_OF_LIST()
2931     }
2932 };
2933 
2934 static const VMStateDescription vmstate_virtio_broken = {
2935     .name = "virtio/broken",
2936     .version_id = 1,
2937     .minimum_version_id = 1,
2938     .needed = &virtio_broken_needed,
2939     .fields = (const VMStateField[]) {
2940         VMSTATE_BOOL(broken, VirtIODevice),
2941         VMSTATE_END_OF_LIST()
2942     }
2943 };
2944 
2945 static const VMStateDescription vmstate_virtio_started = {
2946     .name = "virtio/started",
2947     .version_id = 1,
2948     .minimum_version_id = 1,
2949     .needed = &virtio_started_needed,
2950     .fields = (const VMStateField[]) {
2951         VMSTATE_BOOL(started, VirtIODevice),
2952         VMSTATE_END_OF_LIST()
2953     }
2954 };
2955 
2956 static const VMStateDescription vmstate_virtio_disabled = {
2957     .name = "virtio/disabled",
2958     .version_id = 1,
2959     .minimum_version_id = 1,
2960     .needed = &virtio_disabled_needed,
2961     .fields = (const VMStateField[]) {
2962         VMSTATE_BOOL(disabled, VirtIODevice),
2963         VMSTATE_END_OF_LIST()
2964     }
2965 };
2966 
2967 static const VMStateDescription vmstate_virtio = {
2968     .name = "virtio",
2969     .version_id = 1,
2970     .minimum_version_id = 1,
2971     .fields = (const VMStateField[]) {
2972         VMSTATE_END_OF_LIST()
2973     },
2974     .subsections = (const VMStateDescription * const []) {
2975         &vmstate_virtio_device_endian,
2976         &vmstate_virtio_64bit_features,
2977         &vmstate_virtio_virtqueues,
2978         &vmstate_virtio_ringsize,
2979         &vmstate_virtio_broken,
2980         &vmstate_virtio_extra_state,
2981         &vmstate_virtio_started,
2982         &vmstate_virtio_packed_virtqueues,
2983         &vmstate_virtio_disabled,
2984         NULL
2985     }
2986 };
2987 
virtio_save(VirtIODevice * vdev,QEMUFile * f)2988 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2989 {
2990     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2991     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2992     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2993     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2994     int i;
2995 
2996     if (k->save_config) {
2997         k->save_config(qbus->parent, f);
2998     }
2999 
3000     qemu_put_8s(f, &vdev->status);
3001     qemu_put_8s(f, &vdev->isr);
3002     qemu_put_be16s(f, &vdev->queue_sel);
3003     qemu_put_be32s(f, &guest_features_lo);
3004     qemu_put_be32(f, vdev->config_len);
3005     qemu_put_buffer(f, vdev->config, vdev->config_len);
3006 
3007     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3008         if (vdev->vq[i].vring.num == 0)
3009             break;
3010     }
3011 
3012     qemu_put_be32(f, i);
3013 
3014     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3015         if (vdev->vq[i].vring.num == 0)
3016             break;
3017 
3018         qemu_put_be32(f, vdev->vq[i].vring.num);
3019         if (k->has_variable_vring_alignment) {
3020             qemu_put_be32(f, vdev->vq[i].vring.align);
3021         }
3022         /*
3023          * Save desc now, the rest of the ring addresses are saved in
3024          * subsections for VIRTIO-1 devices.
3025          */
3026         qemu_put_be64(f, vdev->vq[i].vring.desc);
3027         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
3028         if (k->save_queue) {
3029             k->save_queue(qbus->parent, i, f);
3030         }
3031     }
3032 
3033     if (vdc->save != NULL) {
3034         vdc->save(vdev, f);
3035     }
3036 
3037     if (vdc->vmsd) {
3038         int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
3039         if (ret) {
3040             return ret;
3041         }
3042     }
3043 
3044     /* Subsections */
3045     return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
3046 }
3047 
3048 /* A wrapper for use as a VMState .put function */
virtio_device_put(QEMUFile * f,void * opaque,size_t size,const VMStateField * field,JSONWriter * vmdesc)3049 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
3050                               const VMStateField *field, JSONWriter *vmdesc)
3051 {
3052     return virtio_save(VIRTIO_DEVICE(opaque), f);
3053 }
3054 
3055 /* A wrapper for use as a VMState .get function */
3056 static int coroutine_mixed_fn
virtio_device_get(QEMUFile * f,void * opaque,size_t size,const VMStateField * field)3057 virtio_device_get(QEMUFile *f, void *opaque, size_t size,
3058                   const VMStateField *field)
3059 {
3060     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
3061     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
3062 
3063     return virtio_load(vdev, f, dc->vmsd->version_id);
3064 }
3065 
3066 const VMStateInfo  virtio_vmstate_info = {
3067     .name = "virtio",
3068     .get = virtio_device_get,
3069     .put = virtio_device_put,
3070 };
3071 
virtio_set_features_nocheck(VirtIODevice * vdev,uint64_t val)3072 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
3073 {
3074     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
3075     bool bad = (val & ~(vdev->host_features)) != 0;
3076 
3077     val &= vdev->host_features;
3078     if (k->set_features) {
3079         k->set_features(vdev, val);
3080     }
3081     vdev->guest_features = val;
3082     return bad ? -1 : 0;
3083 }
3084 
3085 typedef struct VirtioSetFeaturesNocheckData {
3086     Coroutine *co;
3087     VirtIODevice *vdev;
3088     uint64_t val;
3089     int ret;
3090 } VirtioSetFeaturesNocheckData;
3091 
virtio_set_features_nocheck_bh(void * opaque)3092 static void virtio_set_features_nocheck_bh(void *opaque)
3093 {
3094     VirtioSetFeaturesNocheckData *data = opaque;
3095 
3096     data->ret = virtio_set_features_nocheck(data->vdev, data->val);
3097     aio_co_wake(data->co);
3098 }
3099 
3100 static int coroutine_mixed_fn
virtio_set_features_nocheck_maybe_co(VirtIODevice * vdev,uint64_t val)3101 virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, uint64_t val)
3102 {
3103     if (qemu_in_coroutine()) {
3104         VirtioSetFeaturesNocheckData data = {
3105             .co = qemu_coroutine_self(),
3106             .vdev = vdev,
3107             .val = val,
3108         };
3109         aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
3110                                 virtio_set_features_nocheck_bh, &data);
3111         qemu_coroutine_yield();
3112         return data.ret;
3113     } else {
3114         return virtio_set_features_nocheck(vdev, val);
3115     }
3116 }
3117 
virtio_set_features(VirtIODevice * vdev,uint64_t val)3118 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
3119 {
3120     int ret;
3121     /*
3122      * The driver must not attempt to set features after feature negotiation
3123      * has finished.
3124      */
3125     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
3126         return -EINVAL;
3127     }
3128 
3129     if (val & (1ull << VIRTIO_F_BAD_FEATURE)) {
3130         qemu_log_mask(LOG_GUEST_ERROR,
3131                       "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n",
3132                       __func__, vdev->name);
3133     }
3134 
3135     ret = virtio_set_features_nocheck(vdev, val);
3136     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
3137         /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
3138         int i;
3139         for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3140             if (vdev->vq[i].vring.num != 0) {
3141                 virtio_init_region_cache(vdev, i);
3142             }
3143         }
3144     }
3145     if (!ret) {
3146         if (!virtio_device_started(vdev, vdev->status) &&
3147             !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3148             vdev->start_on_kick = true;
3149         }
3150     }
3151     return ret;
3152 }
3153 
virtio_reset(void * opaque)3154 void virtio_reset(void *opaque)
3155 {
3156     VirtIODevice *vdev = opaque;
3157     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
3158     int i;
3159 
3160     virtio_set_status(vdev, 0);
3161     if (current_cpu) {
3162         /* Guest initiated reset */
3163         vdev->device_endian = virtio_current_cpu_endian();
3164     } else {
3165         /* System reset */
3166         vdev->device_endian = virtio_default_endian();
3167     }
3168 
3169     if (k->get_vhost) {
3170         struct vhost_dev *hdev = k->get_vhost(vdev);
3171         /* Only reset when vhost back-end is connected */
3172         if (hdev && hdev->vhost_ops) {
3173             vhost_reset_device(hdev);
3174         }
3175     }
3176 
3177     if (k->reset) {
3178         k->reset(vdev);
3179     }
3180 
3181     vdev->start_on_kick = false;
3182     vdev->started = false;
3183     vdev->broken = false;
3184     virtio_set_features_nocheck(vdev, 0);
3185     vdev->queue_sel = 0;
3186     vdev->status = 0;
3187     vdev->disabled = false;
3188     qatomic_set(&vdev->isr, 0);
3189     vdev->config_vector = VIRTIO_NO_VECTOR;
3190     virtio_notify_vector(vdev, vdev->config_vector);
3191 
3192     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3193         __virtio_queue_reset(vdev, i);
3194     }
3195 }
3196 
virtio_device_check_notification_compatibility(VirtIODevice * vdev,Error ** errp)3197 static void virtio_device_check_notification_compatibility(VirtIODevice *vdev,
3198                                                            Error **errp)
3199 {
3200     VirtioBusState *bus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3201     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
3202     DeviceState *proxy = DEVICE(BUS(bus)->parent);
3203 
3204     if (virtio_host_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA) &&
3205         k->ioeventfd_enabled(proxy)) {
3206         error_setg(errp,
3207                    "notification_data=on without ioeventfd=off is not supported");
3208     }
3209 }
3210 
virtio_get_config_size(const VirtIOConfigSizeParams * params,uint64_t host_features)3211 size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
3212                               uint64_t host_features)
3213 {
3214     size_t config_size = params->min_size;
3215     const VirtIOFeature *feature_sizes = params->feature_sizes;
3216     size_t i;
3217 
3218     for (i = 0; feature_sizes[i].flags != 0; i++) {
3219         if (host_features & feature_sizes[i].flags) {
3220             config_size = MAX(feature_sizes[i].end, config_size);
3221         }
3222     }
3223 
3224     assert(config_size <= params->max_size);
3225     return config_size;
3226 }
3227 
3228 int coroutine_mixed_fn
virtio_load(VirtIODevice * vdev,QEMUFile * f,int version_id)3229 virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
3230 {
3231     int i, ret;
3232     int32_t config_len;
3233     uint32_t num;
3234     uint32_t features;
3235     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3236     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3237     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3238 
3239     /*
3240      * We poison the endianness to ensure it does not get used before
3241      * subsections have been loaded.
3242      */
3243     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3244 
3245     if (k->load_config) {
3246         ret = k->load_config(qbus->parent, f);
3247         if (ret)
3248             return ret;
3249     }
3250 
3251     qemu_get_8s(f, &vdev->status);
3252     qemu_get_8s(f, &vdev->isr);
3253     qemu_get_be16s(f, &vdev->queue_sel);
3254     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3255         return -1;
3256     }
3257     qemu_get_be32s(f, &features);
3258 
3259     /*
3260      * Temporarily set guest_features low bits - needed by
3261      * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3262      * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3263      *
3264      * Note: devices should always test host features in future - don't create
3265      * new dependencies like this.
3266      */
3267     vdev->guest_features = features;
3268 
3269     config_len = qemu_get_be32(f);
3270 
3271     /*
3272      * There are cases where the incoming config can be bigger or smaller
3273      * than what we have; so load what we have space for, and skip
3274      * any excess that's in the stream.
3275      */
3276     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3277 
3278     while (config_len > vdev->config_len) {
3279         qemu_get_byte(f);
3280         config_len--;
3281     }
3282 
3283     num = qemu_get_be32(f);
3284 
3285     if (num > VIRTIO_QUEUE_MAX) {
3286         error_report("Invalid number of virtqueues: 0x%x", num);
3287         return -1;
3288     }
3289 
3290     if (vdc->pre_load_queues) {
3291         ret = vdc->pre_load_queues(vdev, num);
3292         if (ret) {
3293             return ret;
3294         }
3295     }
3296 
3297     for (i = 0; i < num; i++) {
3298         vdev->vq[i].vring.num = qemu_get_be32(f);
3299         if (k->has_variable_vring_alignment) {
3300             vdev->vq[i].vring.align = qemu_get_be32(f);
3301         }
3302         vdev->vq[i].vring.desc = qemu_get_be64(f);
3303         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3304         vdev->vq[i].signalled_used_valid = false;
3305         vdev->vq[i].notification = true;
3306 
3307         if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3308             error_report("VQ %d address 0x0 "
3309                          "inconsistent with Host index 0x%x",
3310                          i, vdev->vq[i].last_avail_idx);
3311             return -1;
3312         }
3313         if (k->load_queue) {
3314             ret = k->load_queue(qbus->parent, i, f);
3315             if (ret)
3316                 return ret;
3317         }
3318     }
3319 
3320     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3321 
3322     if (vdc->load != NULL) {
3323         ret = vdc->load(vdev, f, version_id);
3324         if (ret) {
3325             return ret;
3326         }
3327     }
3328 
3329     if (vdc->vmsd) {
3330         ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3331         if (ret) {
3332             return ret;
3333         }
3334     }
3335 
3336     /* Subsections */
3337     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3338     if (ret) {
3339         return ret;
3340     }
3341 
3342     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3343         vdev->device_endian = virtio_default_endian();
3344     }
3345 
3346     if (virtio_64bit_features_needed(vdev)) {
3347         /*
3348          * Subsection load filled vdev->guest_features.  Run them
3349          * through virtio_set_features to sanity-check them against
3350          * host_features.
3351          */
3352         uint64_t features64 = vdev->guest_features;
3353         if (virtio_set_features_nocheck_maybe_co(vdev, features64) < 0) {
3354             error_report("Features 0x%" PRIx64 " unsupported. "
3355                          "Allowed features: 0x%" PRIx64,
3356                          features64, vdev->host_features);
3357             return -1;
3358         }
3359     } else {
3360         if (virtio_set_features_nocheck_maybe_co(vdev, features) < 0) {
3361             error_report("Features 0x%x unsupported. "
3362                          "Allowed features: 0x%" PRIx64,
3363                          features, vdev->host_features);
3364             return -1;
3365         }
3366     }
3367 
3368     if (!virtio_device_started(vdev, vdev->status) &&
3369         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3370         vdev->start_on_kick = true;
3371     }
3372 
3373     RCU_READ_LOCK_GUARD();
3374     for (i = 0; i < num; i++) {
3375         if (vdev->vq[i].vring.desc) {
3376             uint16_t nheads;
3377 
3378             /*
3379              * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3380              * only the region cache needs to be set up.  Legacy devices need
3381              * to calculate used and avail ring addresses based on the desc
3382              * address.
3383              */
3384             if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3385                 virtio_init_region_cache(vdev, i);
3386             } else {
3387                 virtio_queue_update_rings(vdev, i);
3388             }
3389 
3390             if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3391                 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3392                 vdev->vq[i].shadow_avail_wrap_counter =
3393                                         vdev->vq[i].last_avail_wrap_counter;
3394                 continue;
3395             }
3396 
3397             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3398             /* Check it isn't doing strange things with descriptor numbers. */
3399             if (nheads > vdev->vq[i].vring.num) {
3400                 virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3401                              "inconsistent with Host index 0x%x: delta 0x%x",
3402                              i, vdev->vq[i].vring.num,
3403                              vring_avail_idx(&vdev->vq[i]),
3404                              vdev->vq[i].last_avail_idx, nheads);
3405                 vdev->vq[i].used_idx = 0;
3406                 vdev->vq[i].shadow_avail_idx = 0;
3407                 vdev->vq[i].inuse = 0;
3408                 continue;
3409             }
3410             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3411             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3412 
3413             /*
3414              * Some devices migrate VirtQueueElements that have been popped
3415              * from the avail ring but not yet returned to the used ring.
3416              * Since max ring size < UINT16_MAX it's safe to use modulo
3417              * UINT16_MAX + 1 subtraction.
3418              */
3419             vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3420                                 vdev->vq[i].used_idx);
3421             if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3422                 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3423                              "used_idx 0x%x",
3424                              i, vdev->vq[i].vring.num,
3425                              vdev->vq[i].last_avail_idx,
3426                              vdev->vq[i].used_idx);
3427                 return -1;
3428             }
3429         }
3430     }
3431 
3432     if (vdc->post_load) {
3433         ret = vdc->post_load(vdev);
3434         if (ret) {
3435             return ret;
3436         }
3437     }
3438 
3439     return 0;
3440 }
3441 
virtio_cleanup(VirtIODevice * vdev)3442 void virtio_cleanup(VirtIODevice *vdev)
3443 {
3444     qemu_del_vm_change_state_handler(vdev->vmstate);
3445 }
3446 
virtio_vmstate_change(void * opaque,bool running,RunState state)3447 static int virtio_vmstate_change(void *opaque, bool running, RunState state)
3448 {
3449     VirtIODevice *vdev = opaque;
3450     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3451     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3452     bool backend_run = running && virtio_device_started(vdev, vdev->status);
3453     vdev->vm_running = running;
3454 
3455     if (backend_run) {
3456         virtio_set_status(vdev, vdev->status);
3457     }
3458 
3459     if (k->vmstate_change) {
3460         k->vmstate_change(qbus->parent, backend_run);
3461     }
3462 
3463     if (!backend_run) {
3464         int ret = virtio_set_status(vdev, vdev->status);
3465         if (ret) {
3466             return ret;
3467         }
3468     }
3469     return 0;
3470 }
3471 
virtio_instance_init_common(Object * proxy_obj,void * data,size_t vdev_size,const char * vdev_name)3472 void virtio_instance_init_common(Object *proxy_obj, void *data,
3473                                  size_t vdev_size, const char *vdev_name)
3474 {
3475     DeviceState *vdev = data;
3476 
3477     object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3478                                        vdev_size, vdev_name, &error_abort,
3479                                        NULL);
3480     qdev_alias_all_properties(vdev, proxy_obj);
3481 }
3482 
virtio_init(VirtIODevice * vdev,uint16_t device_id,size_t config_size)3483 void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
3484 {
3485     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3486     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3487     int i;
3488     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3489 
3490     if (nvectors) {
3491         vdev->vector_queues =
3492             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3493     }
3494 
3495     vdev->start_on_kick = false;
3496     vdev->started = false;
3497     vdev->vhost_started = false;
3498     vdev->device_id = device_id;
3499     vdev->status = 0;
3500     qatomic_set(&vdev->isr, 0);
3501     vdev->queue_sel = 0;
3502     vdev->config_vector = VIRTIO_NO_VECTOR;
3503     vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
3504     vdev->vm_running = runstate_is_running();
3505     vdev->broken = false;
3506     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3507         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3508         vdev->vq[i].vdev = vdev;
3509         vdev->vq[i].queue_index = i;
3510         vdev->vq[i].host_notifier_enabled = false;
3511     }
3512 
3513     vdev->name = virtio_id_to_name(device_id);
3514     vdev->config_len = config_size;
3515     if (vdev->config_len) {
3516         vdev->config = g_malloc0(config_size);
3517     } else {
3518         vdev->config = NULL;
3519     }
3520     vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3521             NULL, virtio_vmstate_change, vdev);
3522     vdev->device_endian = virtio_default_endian();
3523     vdev->use_guest_notifier_mask = true;
3524 }
3525 
3526 /*
3527  * Only devices that have already been around prior to defining the virtio
3528  * standard support legacy mode; this includes devices not specified in the
3529  * standard. All newer devices conform to the virtio standard only.
3530  */
virtio_legacy_allowed(VirtIODevice * vdev)3531 bool virtio_legacy_allowed(VirtIODevice *vdev)
3532 {
3533     switch (vdev->device_id) {
3534     case VIRTIO_ID_NET:
3535     case VIRTIO_ID_BLOCK:
3536     case VIRTIO_ID_CONSOLE:
3537     case VIRTIO_ID_RNG:
3538     case VIRTIO_ID_BALLOON:
3539     case VIRTIO_ID_RPMSG:
3540     case VIRTIO_ID_SCSI:
3541     case VIRTIO_ID_9P:
3542     case VIRTIO_ID_RPROC_SERIAL:
3543     case VIRTIO_ID_CAIF:
3544         return true;
3545     default:
3546         return false;
3547     }
3548 }
3549 
virtio_legacy_check_disabled(VirtIODevice * vdev)3550 bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3551 {
3552     return vdev->disable_legacy_check;
3553 }
3554 
virtio_queue_get_desc_addr(VirtIODevice * vdev,int n)3555 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3556 {
3557     return vdev->vq[n].vring.desc;
3558 }
3559 
virtio_queue_enabled_legacy(VirtIODevice * vdev,int n)3560 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3561 {
3562     return virtio_queue_get_desc_addr(vdev, n) != 0;
3563 }
3564 
virtio_queue_enabled(VirtIODevice * vdev,int n)3565 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3566 {
3567     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3568     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3569 
3570     if (k->queue_enabled) {
3571         return k->queue_enabled(qbus->parent, n);
3572     }
3573     return virtio_queue_enabled_legacy(vdev, n);
3574 }
3575 
virtio_queue_get_avail_addr(VirtIODevice * vdev,int n)3576 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3577 {
3578     return vdev->vq[n].vring.avail;
3579 }
3580 
virtio_queue_get_used_addr(VirtIODevice * vdev,int n)3581 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3582 {
3583     return vdev->vq[n].vring.used;
3584 }
3585 
virtio_queue_get_desc_size(VirtIODevice * vdev,int n)3586 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3587 {
3588     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3589 }
3590 
virtio_queue_get_avail_size(VirtIODevice * vdev,int n)3591 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3592 {
3593     int s;
3594 
3595     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3596         return sizeof(struct VRingPackedDescEvent);
3597     }
3598 
3599     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3600     return offsetof(VRingAvail, ring) +
3601         sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3602 }
3603 
virtio_queue_get_used_size(VirtIODevice * vdev,int n)3604 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3605 {
3606     int s;
3607 
3608     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3609         return sizeof(struct VRingPackedDescEvent);
3610     }
3611 
3612     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3613     return offsetof(VRingUsed, ring) +
3614         sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3615 }
3616 
virtio_queue_packed_get_last_avail_idx(VirtIODevice * vdev,int n)3617 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3618                                                            int n)
3619 {
3620     unsigned int avail, used;
3621 
3622     avail = vdev->vq[n].last_avail_idx;
3623     avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3624 
3625     used = vdev->vq[n].used_idx;
3626     used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3627 
3628     return avail | used << 16;
3629 }
3630 
virtio_queue_split_get_last_avail_idx(VirtIODevice * vdev,int n)3631 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3632                                                       int n)
3633 {
3634     return vdev->vq[n].last_avail_idx;
3635 }
3636 
virtio_queue_get_last_avail_idx(VirtIODevice * vdev,int n)3637 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3638 {
3639     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3640         return virtio_queue_packed_get_last_avail_idx(vdev, n);
3641     } else {
3642         return virtio_queue_split_get_last_avail_idx(vdev, n);
3643     }
3644 }
3645 
virtio_queue_packed_set_last_avail_idx(VirtIODevice * vdev,int n,unsigned int idx)3646 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3647                                                    int n, unsigned int idx)
3648 {
3649     struct VirtQueue *vq = &vdev->vq[n];
3650 
3651     vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3652     vq->last_avail_wrap_counter =
3653         vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3654     idx >>= 16;
3655     vq->used_idx = idx & 0x7fff;
3656     vq->used_wrap_counter = !!(idx & 0x8000);
3657 }
3658 
virtio_queue_split_set_last_avail_idx(VirtIODevice * vdev,int n,unsigned int idx)3659 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3660                                                   int n, unsigned int idx)
3661 {
3662         vdev->vq[n].last_avail_idx = idx;
3663         vdev->vq[n].shadow_avail_idx = idx;
3664 }
3665 
virtio_queue_set_last_avail_idx(VirtIODevice * vdev,int n,unsigned int idx)3666 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3667                                      unsigned int idx)
3668 {
3669     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3670         virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3671     } else {
3672         virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3673     }
3674 }
3675 
virtio_queue_packed_restore_last_avail_idx(VirtIODevice * vdev,int n)3676 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3677                                                        int n)
3678 {
3679     /* We don't have a reference like avail idx in shared memory */
3680 }
3681 
virtio_queue_split_restore_last_avail_idx(VirtIODevice * vdev,int n)3682 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3683                                                       int n)
3684 {
3685     RCU_READ_LOCK_GUARD();
3686     if (vdev->vq[n].vring.desc) {
3687         vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3688         vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3689     }
3690 }
3691 
virtio_queue_restore_last_avail_idx(VirtIODevice * vdev,int n)3692 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3693 {
3694     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3695         virtio_queue_packed_restore_last_avail_idx(vdev, n);
3696     } else {
3697         virtio_queue_split_restore_last_avail_idx(vdev, n);
3698     }
3699 }
3700 
virtio_queue_packed_update_used_idx(VirtIODevice * vdev,int n)3701 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3702 {
3703     /* used idx was updated through set_last_avail_idx() */
3704 }
3705 
virtio_queue_split_update_used_idx(VirtIODevice * vdev,int n)3706 static void virtio_queue_split_update_used_idx(VirtIODevice *vdev, int n)
3707 {
3708     RCU_READ_LOCK_GUARD();
3709     if (vdev->vq[n].vring.desc) {
3710         vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3711     }
3712 }
3713 
virtio_queue_update_used_idx(VirtIODevice * vdev,int n)3714 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3715 {
3716     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3717         return virtio_queue_packed_update_used_idx(vdev, n);
3718     } else {
3719         return virtio_queue_split_update_used_idx(vdev, n);
3720     }
3721 }
3722 
virtio_queue_invalidate_signalled_used(VirtIODevice * vdev,int n)3723 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3724 {
3725     vdev->vq[n].signalled_used_valid = false;
3726 }
3727 
virtio_get_queue(VirtIODevice * vdev,int n)3728 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3729 {
3730     return vdev->vq + n;
3731 }
3732 
virtio_get_queue_index(VirtQueue * vq)3733 uint16_t virtio_get_queue_index(VirtQueue *vq)
3734 {
3735     return vq->queue_index;
3736 }
3737 
virtio_queue_guest_notifier_read(EventNotifier * n)3738 static void virtio_queue_guest_notifier_read(EventNotifier *n)
3739 {
3740     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3741     if (event_notifier_test_and_clear(n)) {
3742         virtio_irq(vq);
3743     }
3744 }
virtio_config_guest_notifier_read(EventNotifier * n)3745 static void virtio_config_guest_notifier_read(EventNotifier *n)
3746 {
3747     VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier);
3748 
3749     if (event_notifier_test_and_clear(n)) {
3750         virtio_notify_config(vdev);
3751     }
3752 }
virtio_queue_set_guest_notifier_fd_handler(VirtQueue * vq,bool assign,bool with_irqfd)3753 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3754                                                 bool with_irqfd)
3755 {
3756     if (assign && !with_irqfd) {
3757         event_notifier_set_handler(&vq->guest_notifier,
3758                                    virtio_queue_guest_notifier_read);
3759     } else {
3760         event_notifier_set_handler(&vq->guest_notifier, NULL);
3761     }
3762     if (!assign) {
3763         /* Test and clear notifier before closing it,
3764          * in case poll callback didn't have time to run. */
3765         virtio_queue_guest_notifier_read(&vq->guest_notifier);
3766     }
3767 }
3768 
virtio_config_set_guest_notifier_fd_handler(VirtIODevice * vdev,bool assign,bool with_irqfd)3769 void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
3770                                                  bool assign, bool with_irqfd)
3771 {
3772     EventNotifier *n;
3773     n = &vdev->config_notifier;
3774     if (assign && !with_irqfd) {
3775         event_notifier_set_handler(n, virtio_config_guest_notifier_read);
3776     } else {
3777         event_notifier_set_handler(n, NULL);
3778     }
3779     if (!assign) {
3780         /* Test and clear notifier before closing it,*/
3781         /* in case poll callback didn't have time to run. */
3782         virtio_config_guest_notifier_read(n);
3783     }
3784 }
3785 
virtio_queue_get_guest_notifier(VirtQueue * vq)3786 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3787 {
3788     return &vq->guest_notifier;
3789 }
3790 
virtio_queue_host_notifier_aio_poll_begin(EventNotifier * n)3791 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3792 {
3793     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3794 
3795     virtio_queue_set_notification(vq, 0);
3796 }
3797 
virtio_queue_host_notifier_aio_poll(void * opaque)3798 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3799 {
3800     EventNotifier *n = opaque;
3801     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3802 
3803     return vq->vring.desc && !virtio_queue_empty(vq);
3804 }
3805 
virtio_queue_host_notifier_aio_poll_ready(EventNotifier * n)3806 static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n)
3807 {
3808     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3809 
3810     virtio_queue_notify_vq(vq);
3811 }
3812 
virtio_queue_host_notifier_aio_poll_end(EventNotifier * n)3813 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3814 {
3815     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3816 
3817     /* Caller polls once more after this to catch requests that race with us */
3818     virtio_queue_set_notification(vq, 1);
3819 }
3820 
virtio_queue_aio_attach_host_notifier(VirtQueue * vq,AioContext * ctx)3821 void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
3822 {
3823     /*
3824      * virtio_queue_aio_detach_host_notifier() can leave notifications disabled.
3825      * Re-enable them.  (And if detach has not been used before, notifications
3826      * being enabled is still the default state while a notifier is attached;
3827      * see virtio_queue_host_notifier_aio_poll_end(), which will always leave
3828      * notifications enabled once the polling section is left.)
3829      */
3830     if (!virtio_queue_get_notification(vq)) {
3831         virtio_queue_set_notification(vq, 1);
3832     }
3833 
3834     aio_set_event_notifier(ctx, &vq->host_notifier,
3835                            virtio_queue_host_notifier_read,
3836                            virtio_queue_host_notifier_aio_poll,
3837                            virtio_queue_host_notifier_aio_poll_ready);
3838     aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3839                                 virtio_queue_host_notifier_aio_poll_begin,
3840                                 virtio_queue_host_notifier_aio_poll_end);
3841 
3842     /*
3843      * We will have ignored notifications about new requests from the guest
3844      * while no notifiers were attached, so "kick" the virt queue to process
3845      * those requests now.
3846      */
3847     event_notifier_set(&vq->host_notifier);
3848 }
3849 
3850 /*
3851  * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use
3852  * this for rx virtqueues and similar cases where the virtqueue handler
3853  * function does not pop all elements. When the virtqueue is left non-empty
3854  * polling consumes CPU cycles and should not be used.
3855  */
virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue * vq,AioContext * ctx)3856 void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
3857 {
3858     /* See virtio_queue_aio_attach_host_notifier() */
3859     if (!virtio_queue_get_notification(vq)) {
3860         virtio_queue_set_notification(vq, 1);
3861     }
3862 
3863     aio_set_event_notifier(ctx, &vq->host_notifier,
3864                            virtio_queue_host_notifier_read,
3865                            NULL, NULL);
3866 
3867     /*
3868      * See virtio_queue_aio_attach_host_notifier().
3869      * Note that this may be unnecessary for the type of virtqueues this
3870      * function is used for.  Still, it will not hurt to have a quick look into
3871      * whether we can/should process any of the virtqueue elements.
3872      */
3873     event_notifier_set(&vq->host_notifier);
3874 }
3875 
virtio_queue_aio_detach_host_notifier(VirtQueue * vq,AioContext * ctx)3876 void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
3877 {
3878     aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL);
3879 
3880     /*
3881      * aio_set_event_notifier_poll() does not guarantee whether io_poll_end()
3882      * will run after io_poll_begin(), so by removing the notifier, we do not
3883      * know whether virtio_queue_host_notifier_aio_poll_end() has run after a
3884      * previous virtio_queue_host_notifier_aio_poll_begin(), i.e. whether
3885      * notifications are enabled or disabled.  It does not really matter anyway;
3886      * we just removed the notifier, so we do not care about notifications until
3887      * we potentially re-attach it.  The attach_host_notifier functions will
3888      * ensure that notifications are enabled again when they are needed.
3889      */
3890 }
3891 
virtio_queue_host_notifier_read(EventNotifier * n)3892 void virtio_queue_host_notifier_read(EventNotifier *n)
3893 {
3894     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3895     if (event_notifier_test_and_clear(n)) {
3896         virtio_queue_notify_vq(vq);
3897     }
3898 }
3899 
virtio_queue_get_host_notifier(VirtQueue * vq)3900 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3901 {
3902     return &vq->host_notifier;
3903 }
3904 
virtio_config_get_guest_notifier(VirtIODevice * vdev)3905 EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev)
3906 {
3907     return &vdev->config_notifier;
3908 }
3909 
virtio_queue_set_host_notifier_enabled(VirtQueue * vq,bool enabled)3910 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3911 {
3912     vq->host_notifier_enabled = enabled;
3913 }
3914 
virtio_queue_set_host_notifier_mr(VirtIODevice * vdev,int n,MemoryRegion * mr,bool assign)3915 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3916                                       MemoryRegion *mr, bool assign)
3917 {
3918     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3919     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3920 
3921     if (k->set_host_notifier_mr) {
3922         return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3923     }
3924 
3925     return -1;
3926 }
3927 
virtio_device_set_child_bus_name(VirtIODevice * vdev,char * bus_name)3928 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3929 {
3930     g_free(vdev->bus_name);
3931     vdev->bus_name = g_strdup(bus_name);
3932 }
3933 
virtio_error(VirtIODevice * vdev,const char * fmt,...)3934 void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3935 {
3936     va_list ap;
3937 
3938     va_start(ap, fmt);
3939     error_vreport(fmt, ap);
3940     va_end(ap);
3941 
3942     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3943         vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3944         virtio_notify_config(vdev);
3945     }
3946 
3947     vdev->broken = true;
3948 }
3949 
virtio_memory_listener_commit(MemoryListener * listener)3950 static void virtio_memory_listener_commit(MemoryListener *listener)
3951 {
3952     VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3953     int i;
3954 
3955     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3956         if (vdev->vq[i].vring.num == 0) {
3957             break;
3958         }
3959         virtio_init_region_cache(vdev, i);
3960     }
3961 }
3962 
virtio_device_realize(DeviceState * dev,Error ** errp)3963 static void virtio_device_realize(DeviceState *dev, Error **errp)
3964 {
3965     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3966     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3967     Error *err = NULL;
3968 
3969     /* Devices should either use vmsd or the load/save methods */
3970     assert(!vdc->vmsd || !vdc->load);
3971 
3972     if (vdc->realize != NULL) {
3973         vdc->realize(dev, &err);
3974         if (err != NULL) {
3975             error_propagate(errp, err);
3976             return;
3977         }
3978     }
3979 
3980     /* Devices should not use both ioeventfd and notification data feature */
3981     virtio_device_check_notification_compatibility(vdev, &err);
3982     if (err != NULL) {
3983         error_propagate(errp, err);
3984         vdc->unrealize(dev);
3985         return;
3986     }
3987 
3988     virtio_bus_device_plugged(vdev, &err);
3989     if (err != NULL) {
3990         error_propagate(errp, err);
3991         vdc->unrealize(dev);
3992         return;
3993     }
3994 
3995     vdev->listener.commit = virtio_memory_listener_commit;
3996     vdev->listener.name = "virtio";
3997     memory_listener_register(&vdev->listener, vdev->dma_as);
3998 }
3999 
virtio_device_unrealize(DeviceState * dev)4000 static void virtio_device_unrealize(DeviceState *dev)
4001 {
4002     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
4003     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
4004 
4005     memory_listener_unregister(&vdev->listener);
4006     virtio_bus_device_unplugged(vdev);
4007 
4008     if (vdc->unrealize != NULL) {
4009         vdc->unrealize(dev);
4010     }
4011 
4012     g_free(vdev->bus_name);
4013     vdev->bus_name = NULL;
4014 }
4015 
virtio_device_free_virtqueues(VirtIODevice * vdev)4016 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
4017 {
4018     int i;
4019     if (!vdev->vq) {
4020         return;
4021     }
4022 
4023     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
4024         if (vdev->vq[i].vring.num == 0) {
4025             break;
4026         }
4027         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
4028     }
4029     g_free(vdev->vq);
4030 }
4031 
virtio_device_instance_finalize(Object * obj)4032 static void virtio_device_instance_finalize(Object *obj)
4033 {
4034     VirtIODevice *vdev = VIRTIO_DEVICE(obj);
4035 
4036     virtio_device_free_virtqueues(vdev);
4037 
4038     g_free(vdev->config);
4039     g_free(vdev->vector_queues);
4040 }
4041 
4042 static const Property virtio_properties[] = {
4043     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
4044     DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
4045     DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
4046     DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
4047                      disable_legacy_check, false),
4048 };
4049 
virtio_device_start_ioeventfd_impl(VirtIODevice * vdev)4050 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
4051 {
4052     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
4053     int i, n, r, err;
4054 
4055     /*
4056      * Batch all the host notifiers in a single transaction to avoid
4057      * quadratic time complexity in address_space_update_ioeventfds().
4058      */
4059     memory_region_transaction_begin();
4060     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4061         VirtQueue *vq = &vdev->vq[n];
4062         if (!virtio_queue_get_num(vdev, n)) {
4063             continue;
4064         }
4065         r = virtio_bus_set_host_notifier(qbus, n, true);
4066         if (r < 0) {
4067             err = r;
4068             goto assign_error;
4069         }
4070         event_notifier_set_handler(&vq->host_notifier,
4071                                    virtio_queue_host_notifier_read);
4072     }
4073 
4074     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4075         /* Kick right away to begin processing requests already in vring */
4076         VirtQueue *vq = &vdev->vq[n];
4077         if (!vq->vring.num) {
4078             continue;
4079         }
4080         event_notifier_set(&vq->host_notifier);
4081     }
4082     memory_region_transaction_commit();
4083     return 0;
4084 
4085 assign_error:
4086     i = n; /* save n for a second iteration after transaction is committed. */
4087     while (--n >= 0) {
4088         VirtQueue *vq = &vdev->vq[n];
4089         if (!virtio_queue_get_num(vdev, n)) {
4090             continue;
4091         }
4092 
4093         event_notifier_set_handler(&vq->host_notifier, NULL);
4094         r = virtio_bus_set_host_notifier(qbus, n, false);
4095         assert(r >= 0);
4096     }
4097     /*
4098      * The transaction expects the ioeventfds to be open when it
4099      * commits. Do it now, before the cleanup loop.
4100      */
4101     memory_region_transaction_commit();
4102 
4103     while (--i >= 0) {
4104         if (!virtio_queue_get_num(vdev, i)) {
4105             continue;
4106         }
4107         virtio_bus_cleanup_host_notifier(qbus, i);
4108     }
4109     return err;
4110 }
4111 
virtio_device_start_ioeventfd(VirtIODevice * vdev)4112 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
4113 {
4114     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4115     VirtioBusState *vbus = VIRTIO_BUS(qbus);
4116 
4117     return virtio_bus_start_ioeventfd(vbus);
4118 }
4119 
virtio_device_stop_ioeventfd_impl(VirtIODevice * vdev)4120 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
4121 {
4122     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
4123     int n, r;
4124 
4125     /*
4126      * Batch all the host notifiers in a single transaction to avoid
4127      * quadratic time complexity in address_space_update_ioeventfds().
4128      */
4129     memory_region_transaction_begin();
4130     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4131         VirtQueue *vq = &vdev->vq[n];
4132 
4133         if (!virtio_queue_get_num(vdev, n)) {
4134             continue;
4135         }
4136         event_notifier_set_handler(&vq->host_notifier, NULL);
4137         r = virtio_bus_set_host_notifier(qbus, n, false);
4138         assert(r >= 0);
4139     }
4140     /*
4141      * The transaction expects the ioeventfds to be open when it
4142      * commits. Do it now, before the cleanup loop.
4143      */
4144     memory_region_transaction_commit();
4145 
4146     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4147         if (!virtio_queue_get_num(vdev, n)) {
4148             continue;
4149         }
4150         virtio_bus_cleanup_host_notifier(qbus, n);
4151     }
4152 }
4153 
virtio_device_grab_ioeventfd(VirtIODevice * vdev)4154 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
4155 {
4156     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4157     VirtioBusState *vbus = VIRTIO_BUS(qbus);
4158 
4159     return virtio_bus_grab_ioeventfd(vbus);
4160 }
4161 
virtio_device_release_ioeventfd(VirtIODevice * vdev)4162 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
4163 {
4164     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4165     VirtioBusState *vbus = VIRTIO_BUS(qbus);
4166 
4167     virtio_bus_release_ioeventfd(vbus);
4168 }
4169 
virtio_device_class_init(ObjectClass * klass,const void * data)4170 static void virtio_device_class_init(ObjectClass *klass, const void *data)
4171 {
4172     /* Set the default value here. */
4173     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
4174     DeviceClass *dc = DEVICE_CLASS(klass);
4175 
4176     dc->realize = virtio_device_realize;
4177     dc->unrealize = virtio_device_unrealize;
4178     dc->bus_type = TYPE_VIRTIO_BUS;
4179     device_class_set_props(dc, virtio_properties);
4180     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
4181     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
4182 
4183     vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
4184 }
4185 
virtio_device_ioeventfd_enabled(VirtIODevice * vdev)4186 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
4187 {
4188     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4189     VirtioBusState *vbus = VIRTIO_BUS(qbus);
4190 
4191     return virtio_bus_ioeventfd_enabled(vbus);
4192 }
4193 
qmp_x_query_virtio_queue_status(const char * path,uint16_t queue,Error ** errp)4194 VirtQueueStatus *qmp_x_query_virtio_queue_status(const char *path,
4195                                                  uint16_t queue,
4196                                                  Error **errp)
4197 {
4198     VirtIODevice *vdev;
4199     VirtQueueStatus *status;
4200 
4201     vdev = qmp_find_virtio_device(path);
4202     if (vdev == NULL) {
4203         error_setg(errp, "Path %s is not a VirtIODevice", path);
4204         return NULL;
4205     }
4206 
4207     if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
4208         error_setg(errp, "Invalid virtqueue number %d", queue);
4209         return NULL;
4210     }
4211 
4212     status = g_new0(VirtQueueStatus, 1);
4213     status->name = g_strdup(vdev->name);
4214     status->queue_index = vdev->vq[queue].queue_index;
4215     status->inuse = vdev->vq[queue].inuse;
4216     status->vring_num = vdev->vq[queue].vring.num;
4217     status->vring_num_default = vdev->vq[queue].vring.num_default;
4218     status->vring_align = vdev->vq[queue].vring.align;
4219     status->vring_desc = vdev->vq[queue].vring.desc;
4220     status->vring_avail = vdev->vq[queue].vring.avail;
4221     status->vring_used = vdev->vq[queue].vring.used;
4222     status->used_idx = vdev->vq[queue].used_idx;
4223     status->signalled_used = vdev->vq[queue].signalled_used;
4224     status->signalled_used_valid = vdev->vq[queue].signalled_used_valid;
4225 
4226     if (vdev->vhost_started) {
4227         VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
4228         struct vhost_dev *hdev = vdc->get_vhost(vdev);
4229 
4230         /* check if vq index exists for vhost as well  */
4231         if (queue >= hdev->vq_index && queue < hdev->vq_index + hdev->nvqs) {
4232             status->has_last_avail_idx = true;
4233 
4234             int vhost_vq_index =
4235                 hdev->vhost_ops->vhost_get_vq_index(hdev, queue);
4236             struct vhost_vring_state state = {
4237                 .index = vhost_vq_index,
4238             };
4239 
4240             status->last_avail_idx =
4241                 hdev->vhost_ops->vhost_get_vring_base(hdev, &state);
4242         }
4243     } else {
4244         status->has_shadow_avail_idx = true;
4245         status->has_last_avail_idx = true;
4246         status->last_avail_idx = vdev->vq[queue].last_avail_idx;
4247         status->shadow_avail_idx = vdev->vq[queue].shadow_avail_idx;
4248     }
4249 
4250     return status;
4251 }
4252 
qmp_decode_vring_desc_flags(uint16_t flags)4253 static strList *qmp_decode_vring_desc_flags(uint16_t flags)
4254 {
4255     strList *list = NULL;
4256     strList *node;
4257     int i;
4258 
4259     struct {
4260         uint16_t flag;
4261         const char *value;
4262     } map[] = {
4263         { VRING_DESC_F_NEXT, "next" },
4264         { VRING_DESC_F_WRITE, "write" },
4265         { VRING_DESC_F_INDIRECT, "indirect" },
4266         { 1 << VRING_PACKED_DESC_F_AVAIL, "avail" },
4267         { 1 << VRING_PACKED_DESC_F_USED, "used" },
4268         { 0, "" }
4269     };
4270 
4271     for (i = 0; map[i].flag; i++) {
4272         if ((map[i].flag & flags) == 0) {
4273             continue;
4274         }
4275         node = g_malloc0(sizeof(strList));
4276         node->value = g_strdup(map[i].value);
4277         node->next = list;
4278         list = node;
4279     }
4280 
4281     return list;
4282 }
4283 
qmp_x_query_virtio_queue_element(const char * path,uint16_t queue,bool has_index,uint16_t index,Error ** errp)4284 VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path,
4285                                                      uint16_t queue,
4286                                                      bool has_index,
4287                                                      uint16_t index,
4288                                                      Error **errp)
4289 {
4290     VirtIODevice *vdev;
4291     VirtQueue *vq;
4292     VirtioQueueElement *element = NULL;
4293 
4294     vdev = qmp_find_virtio_device(path);
4295     if (vdev == NULL) {
4296         error_setg(errp, "Path %s is not a VirtIO device", path);
4297         return NULL;
4298     }
4299 
4300     if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
4301         error_setg(errp, "Invalid virtqueue number %d", queue);
4302         return NULL;
4303     }
4304     vq = &vdev->vq[queue];
4305 
4306     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
4307         error_setg(errp, "Packed ring not supported");
4308         return NULL;
4309     } else {
4310         unsigned int head, i, max;
4311         VRingMemoryRegionCaches *caches;
4312         MemoryRegionCache indirect_desc_cache;
4313         MemoryRegionCache *desc_cache;
4314         VRingDesc desc;
4315         VirtioRingDescList *list = NULL;
4316         VirtioRingDescList *node;
4317         int rc; int ndescs;
4318 
4319         address_space_cache_init_empty(&indirect_desc_cache);
4320 
4321         RCU_READ_LOCK_GUARD();
4322 
4323         max = vq->vring.num;
4324 
4325         if (!has_index) {
4326             head = vring_avail_ring(vq, vq->last_avail_idx % vq->vring.num);
4327         } else {
4328             head = vring_avail_ring(vq, index % vq->vring.num);
4329         }
4330         i = head;
4331 
4332         caches = vring_get_region_caches(vq);
4333         if (!caches) {
4334             error_setg(errp, "Region caches not initialized");
4335             return NULL;
4336         }
4337         if (caches->desc.len < max * sizeof(VRingDesc)) {
4338             error_setg(errp, "Cannot map descriptor ring");
4339             return NULL;
4340         }
4341 
4342         desc_cache = &caches->desc;
4343         vring_split_desc_read(vdev, &desc, desc_cache, i);
4344         if (desc.flags & VRING_DESC_F_INDIRECT) {
4345             int64_t len;
4346             len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
4347                                            desc.addr, desc.len, false);
4348             desc_cache = &indirect_desc_cache;
4349             if (len < desc.len) {
4350                 error_setg(errp, "Cannot map indirect buffer");
4351                 goto done;
4352             }
4353 
4354             max = desc.len / sizeof(VRingDesc);
4355             i = 0;
4356             vring_split_desc_read(vdev, &desc, desc_cache, i);
4357         }
4358 
4359         element = g_new0(VirtioQueueElement, 1);
4360         element->avail = g_new0(VirtioRingAvail, 1);
4361         element->used = g_new0(VirtioRingUsed, 1);
4362         element->name = g_strdup(vdev->name);
4363         element->index = head;
4364         element->avail->flags = vring_avail_flags(vq);
4365         element->avail->idx = vring_avail_idx(vq);
4366         element->avail->ring = head;
4367         element->used->flags = vring_used_flags(vq);
4368         element->used->idx = vring_used_idx(vq);
4369         ndescs = 0;
4370 
4371         do {
4372             /* A buggy driver may produce an infinite loop */
4373             if (ndescs >= max) {
4374                 break;
4375             }
4376             node = g_new0(VirtioRingDescList, 1);
4377             node->value = g_new0(VirtioRingDesc, 1);
4378             node->value->addr = desc.addr;
4379             node->value->len = desc.len;
4380             node->value->flags = qmp_decode_vring_desc_flags(desc.flags);
4381             node->next = list;
4382             list = node;
4383 
4384             ndescs++;
4385             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max);
4386         } while (rc == VIRTQUEUE_READ_DESC_MORE);
4387         element->descs = list;
4388 done:
4389         address_space_cache_destroy(&indirect_desc_cache);
4390     }
4391 
4392     return element;
4393 }
4394 
4395 static const TypeInfo virtio_device_info = {
4396     .name = TYPE_VIRTIO_DEVICE,
4397     .parent = TYPE_DEVICE,
4398     .instance_size = sizeof(VirtIODevice),
4399     .class_init = virtio_device_class_init,
4400     .instance_finalize = virtio_device_instance_finalize,
4401     .abstract = true,
4402     .class_size = sizeof(VirtioDeviceClass),
4403 };
4404 
virtio_register_types(void)4405 static void virtio_register_types(void)
4406 {
4407     type_register_static(&virtio_device_info);
4408 }
4409 
type_init(virtio_register_types)4410 type_init(virtio_register_types)
4411 
4412 QEMUBH *virtio_bh_new_guarded_full(DeviceState *dev,
4413                                    QEMUBHFunc *cb, void *opaque,
4414                                    const char *name)
4415 {
4416     DeviceState *transport = qdev_get_parent_bus(dev)->parent;
4417 
4418     return qemu_bh_new_full(cb, opaque, name,
4419                             &transport->mem_reentrancy_guard);
4420 }
4421