xref: /openbmc/qemu/hw/virtio/virtio.c (revision d307c28ca9dba7a0677035c9244198b05164c873)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qemu-common.h"
17 #include "cpu.h"
18 #include "trace.h"
19 #include "exec/address-spaces.h"
20 #include "qemu/error-report.h"
21 #include "hw/virtio/virtio.h"
22 #include "qemu/atomic.h"
23 #include "hw/virtio/virtio-bus.h"
24 #include "migration/migration.h"
25 #include "hw/virtio/virtio-access.h"
26 
27 /*
28  * The alignment to use between consumer and producer parts of vring.
29  * x86 pagesize again. This is the default, used by transports like PCI
30  * which don't provide a means for the guest to tell the host the alignment.
31  */
32 #define VIRTIO_PCI_VRING_ALIGN         4096
33 
34 typedef struct VRingDesc
35 {
36     uint64_t addr;
37     uint32_t len;
38     uint16_t flags;
39     uint16_t next;
40 } VRingDesc;
41 
42 typedef struct VRingAvail
43 {
44     uint16_t flags;
45     uint16_t idx;
46     uint16_t ring[0];
47 } VRingAvail;
48 
49 typedef struct VRingUsedElem
50 {
51     uint32_t id;
52     uint32_t len;
53 } VRingUsedElem;
54 
55 typedef struct VRingUsed
56 {
57     uint16_t flags;
58     uint16_t idx;
59     VRingUsedElem ring[0];
60 } VRingUsed;
61 
62 typedef struct VRing
63 {
64     unsigned int num;
65     unsigned int num_default;
66     unsigned int align;
67     hwaddr desc;
68     hwaddr avail;
69     hwaddr used;
70 } VRing;
71 
72 struct VirtQueue
73 {
74     VRing vring;
75 
76     /* Next head to pop */
77     uint16_t last_avail_idx;
78 
79     /* Last avail_idx read from VQ. */
80     uint16_t shadow_avail_idx;
81 
82     uint16_t used_idx;
83 
84     /* Last used index value we have signalled on */
85     uint16_t signalled_used;
86 
87     /* Last used index value we have signalled on */
88     bool signalled_used_valid;
89 
90     /* Nested host->guest notification disabled counter */
91     unsigned int notification_disabled;
92 
93     uint16_t queue_index;
94 
95     int inuse;
96 
97     uint16_t vector;
98     VirtIOHandleOutput handle_output;
99     VirtIOHandleOutput handle_aio_output;
100     VirtIODevice *vdev;
101     EventNotifier guest_notifier;
102     EventNotifier host_notifier;
103     QLIST_ENTRY(VirtQueue) node;
104 };
105 
106 /* virt queue functions */
107 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
108 {
109     VRing *vring = &vdev->vq[n].vring;
110 
111     if (!vring->desc) {
112         /* not yet setup -> nothing to do */
113         return;
114     }
115     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
116     vring->used = vring_align(vring->avail +
117                               offsetof(VRingAvail, ring[vring->num]),
118                               vring->align);
119 }
120 
121 static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
122                             hwaddr desc_pa, int i)
123 {
124     address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc),
125                        MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
126     virtio_tswap64s(vdev, &desc->addr);
127     virtio_tswap32s(vdev, &desc->len);
128     virtio_tswap16s(vdev, &desc->flags);
129     virtio_tswap16s(vdev, &desc->next);
130 }
131 
132 static inline uint16_t vring_avail_flags(VirtQueue *vq)
133 {
134     hwaddr pa;
135     pa = vq->vring.avail + offsetof(VRingAvail, flags);
136     return virtio_lduw_phys(vq->vdev, pa);
137 }
138 
139 static inline uint16_t vring_avail_idx(VirtQueue *vq)
140 {
141     hwaddr pa;
142     pa = vq->vring.avail + offsetof(VRingAvail, idx);
143     vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
144     return vq->shadow_avail_idx;
145 }
146 
147 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
148 {
149     hwaddr pa;
150     pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
151     return virtio_lduw_phys(vq->vdev, pa);
152 }
153 
154 static inline uint16_t vring_get_used_event(VirtQueue *vq)
155 {
156     return vring_avail_ring(vq, vq->vring.num);
157 }
158 
159 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
160                                     int i)
161 {
162     hwaddr pa;
163     virtio_tswap32s(vq->vdev, &uelem->id);
164     virtio_tswap32s(vq->vdev, &uelem->len);
165     pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
166     address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED,
167                        (void *)uelem, sizeof(VRingUsedElem));
168 }
169 
170 static uint16_t vring_used_idx(VirtQueue *vq)
171 {
172     hwaddr pa;
173     pa = vq->vring.used + offsetof(VRingUsed, idx);
174     return virtio_lduw_phys(vq->vdev, pa);
175 }
176 
177 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
178 {
179     hwaddr pa;
180     pa = vq->vring.used + offsetof(VRingUsed, idx);
181     virtio_stw_phys(vq->vdev, pa, val);
182     vq->used_idx = val;
183 }
184 
185 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
186 {
187     VirtIODevice *vdev = vq->vdev;
188     hwaddr pa;
189     pa = vq->vring.used + offsetof(VRingUsed, flags);
190     virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
191 }
192 
193 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
194 {
195     VirtIODevice *vdev = vq->vdev;
196     hwaddr pa;
197     pa = vq->vring.used + offsetof(VRingUsed, flags);
198     virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
199 }
200 
201 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
202 {
203     hwaddr pa;
204     if (vq->notification_disabled) {
205         return;
206     }
207     pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
208     virtio_stw_phys(vq->vdev, pa, val);
209 }
210 
211 void virtio_queue_set_notification(VirtQueue *vq, int enable)
212 {
213     if (enable) {
214         assert(vq->notification_disabled > 0);
215         vq->notification_disabled--;
216     } else {
217         vq->notification_disabled++;
218     }
219 
220     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
221         vring_set_avail_event(vq, vring_avail_idx(vq));
222     } else if (enable) {
223         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
224     } else {
225         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
226     }
227     if (enable) {
228         /* Expose avail event/used flags before caller checks the avail idx. */
229         smp_mb();
230     }
231 }
232 
233 int virtio_queue_ready(VirtQueue *vq)
234 {
235     return vq->vring.avail != 0;
236 }
237 
238 /* Fetch avail_idx from VQ memory only when we really need to know if
239  * guest has added some buffers. */
240 int virtio_queue_empty(VirtQueue *vq)
241 {
242     if (vq->shadow_avail_idx != vq->last_avail_idx) {
243         return 0;
244     }
245 
246     return vring_avail_idx(vq) == vq->last_avail_idx;
247 }
248 
249 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
250                                unsigned int len)
251 {
252     unsigned int offset;
253     int i;
254 
255     offset = 0;
256     for (i = 0; i < elem->in_num; i++) {
257         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
258 
259         cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
260                                   elem->in_sg[i].iov_len,
261                                   1, size);
262 
263         offset += size;
264     }
265 
266     for (i = 0; i < elem->out_num; i++)
267         cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
268                                   elem->out_sg[i].iov_len,
269                                   0, elem->out_sg[i].iov_len);
270 }
271 
272 /* virtqueue_detach_element:
273  * @vq: The #VirtQueue
274  * @elem: The #VirtQueueElement
275  * @len: number of bytes written
276  *
277  * Detach the element from the virtqueue.  This function is suitable for device
278  * reset or other situations where a #VirtQueueElement is simply freed and will
279  * not be pushed or discarded.
280  */
281 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
282                               unsigned int len)
283 {
284     vq->inuse--;
285     virtqueue_unmap_sg(vq, elem, len);
286 }
287 
288 /* virtqueue_unpop:
289  * @vq: The #VirtQueue
290  * @elem: The #VirtQueueElement
291  * @len: number of bytes written
292  *
293  * Pretend the most recent element wasn't popped from the virtqueue.  The next
294  * call to virtqueue_pop() will refetch the element.
295  */
296 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
297                      unsigned int len)
298 {
299     vq->last_avail_idx--;
300     virtqueue_detach_element(vq, elem, len);
301 }
302 
303 /* virtqueue_rewind:
304  * @vq: The #VirtQueue
305  * @num: Number of elements to push back
306  *
307  * Pretend that elements weren't popped from the virtqueue.  The next
308  * virtqueue_pop() will refetch the oldest element.
309  *
310  * Use virtqueue_unpop() instead if you have a VirtQueueElement.
311  *
312  * Returns: true on success, false if @num is greater than the number of in use
313  * elements.
314  */
315 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
316 {
317     if (num > vq->inuse) {
318         return false;
319     }
320     vq->last_avail_idx -= num;
321     vq->inuse -= num;
322     return true;
323 }
324 
325 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
326                     unsigned int len, unsigned int idx)
327 {
328     VRingUsedElem uelem;
329 
330     trace_virtqueue_fill(vq, elem, len, idx);
331 
332     virtqueue_unmap_sg(vq, elem, len);
333 
334     if (unlikely(vq->vdev->broken)) {
335         return;
336     }
337 
338     idx = (idx + vq->used_idx) % vq->vring.num;
339 
340     uelem.id = elem->index;
341     uelem.len = len;
342     vring_used_write(vq, &uelem, idx);
343 }
344 
345 void virtqueue_flush(VirtQueue *vq, unsigned int count)
346 {
347     uint16_t old, new;
348 
349     if (unlikely(vq->vdev->broken)) {
350         vq->inuse -= count;
351         return;
352     }
353 
354     /* Make sure buffer is written before we update index. */
355     smp_wmb();
356     trace_virtqueue_flush(vq, count);
357     old = vq->used_idx;
358     new = old + count;
359     vring_used_idx_set(vq, new);
360     vq->inuse -= count;
361     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
362         vq->signalled_used_valid = false;
363 }
364 
365 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
366                     unsigned int len)
367 {
368     virtqueue_fill(vq, elem, len, 0);
369     virtqueue_flush(vq, 1);
370 }
371 
372 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
373 {
374     uint16_t num_heads = vring_avail_idx(vq) - idx;
375 
376     /* Check it isn't doing very strange things with descriptor numbers. */
377     if (num_heads > vq->vring.num) {
378         virtio_error(vq->vdev, "Guest moved used index from %u to %u",
379                      idx, vq->shadow_avail_idx);
380         return -EINVAL;
381     }
382     /* On success, callers read a descriptor at vq->last_avail_idx.
383      * Make sure descriptor read does not bypass avail index read. */
384     if (num_heads) {
385         smp_rmb();
386     }
387 
388     return num_heads;
389 }
390 
391 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
392                                unsigned int *head)
393 {
394     /* Grab the next descriptor number they're advertising, and increment
395      * the index we've seen. */
396     *head = vring_avail_ring(vq, idx % vq->vring.num);
397 
398     /* If their number is silly, that's a fatal mistake. */
399     if (*head >= vq->vring.num) {
400         virtio_error(vq->vdev, "Guest says index %u is available", *head);
401         return false;
402     }
403 
404     return true;
405 }
406 
407 enum {
408     VIRTQUEUE_READ_DESC_ERROR = -1,
409     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
410     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
411 };
412 
413 static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
414                                     hwaddr desc_pa, unsigned int max,
415                                     unsigned int *next)
416 {
417     /* If this descriptor says it doesn't chain, we're done. */
418     if (!(desc->flags & VRING_DESC_F_NEXT)) {
419         return VIRTQUEUE_READ_DESC_DONE;
420     }
421 
422     /* Check they're not leading us off end of descriptors. */
423     *next = desc->next;
424     /* Make sure compiler knows to grab that: we don't want it changing! */
425     smp_wmb();
426 
427     if (*next >= max) {
428         virtio_error(vdev, "Desc next is %u", *next);
429         return VIRTQUEUE_READ_DESC_ERROR;
430     }
431 
432     vring_desc_read(vdev, desc, desc_pa, *next);
433     return VIRTQUEUE_READ_DESC_MORE;
434 }
435 
436 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
437                                unsigned int *out_bytes,
438                                unsigned max_in_bytes, unsigned max_out_bytes)
439 {
440     unsigned int idx;
441     unsigned int total_bufs, in_total, out_total;
442     int rc;
443 
444     idx = vq->last_avail_idx;
445 
446     total_bufs = in_total = out_total = 0;
447     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
448         VirtIODevice *vdev = vq->vdev;
449         unsigned int max, num_bufs, indirect = 0;
450         VRingDesc desc;
451         hwaddr desc_pa;
452         unsigned int i;
453 
454         max = vq->vring.num;
455         num_bufs = total_bufs;
456 
457         if (!virtqueue_get_head(vq, idx++, &i)) {
458             goto err;
459         }
460 
461         desc_pa = vq->vring.desc;
462         vring_desc_read(vdev, &desc, desc_pa, i);
463 
464         if (desc.flags & VRING_DESC_F_INDIRECT) {
465             if (desc.len % sizeof(VRingDesc)) {
466                 virtio_error(vdev, "Invalid size for indirect buffer table");
467                 goto err;
468             }
469 
470             /* If we've got too many, that implies a descriptor loop. */
471             if (num_bufs >= max) {
472                 virtio_error(vdev, "Looped descriptor");
473                 goto err;
474             }
475 
476             /* loop over the indirect descriptor table */
477             indirect = 1;
478             max = desc.len / sizeof(VRingDesc);
479             desc_pa = desc.addr;
480             num_bufs = i = 0;
481             vring_desc_read(vdev, &desc, desc_pa, i);
482         }
483 
484         do {
485             /* If we've got too many, that implies a descriptor loop. */
486             if (++num_bufs > max) {
487                 virtio_error(vdev, "Looped descriptor");
488                 goto err;
489             }
490 
491             if (desc.flags & VRING_DESC_F_WRITE) {
492                 in_total += desc.len;
493             } else {
494                 out_total += desc.len;
495             }
496             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
497                 goto done;
498             }
499 
500             rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
501         } while (rc == VIRTQUEUE_READ_DESC_MORE);
502 
503         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
504             goto err;
505         }
506 
507         if (!indirect)
508             total_bufs = num_bufs;
509         else
510             total_bufs++;
511     }
512 
513     if (rc < 0) {
514         goto err;
515     }
516 
517 done:
518     if (in_bytes) {
519         *in_bytes = in_total;
520     }
521     if (out_bytes) {
522         *out_bytes = out_total;
523     }
524     return;
525 
526 err:
527     in_total = out_total = 0;
528     goto done;
529 }
530 
531 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
532                           unsigned int out_bytes)
533 {
534     unsigned int in_total, out_total;
535 
536     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
537     return in_bytes <= in_total && out_bytes <= out_total;
538 }
539 
540 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
541                                hwaddr *addr, struct iovec *iov,
542                                unsigned int max_num_sg, bool is_write,
543                                hwaddr pa, size_t sz)
544 {
545     bool ok = false;
546     unsigned num_sg = *p_num_sg;
547     assert(num_sg <= max_num_sg);
548 
549     if (!sz) {
550         virtio_error(vdev, "virtio: zero sized buffers are not allowed");
551         goto out;
552     }
553 
554     while (sz) {
555         hwaddr len = sz;
556 
557         if (num_sg == max_num_sg) {
558             virtio_error(vdev, "virtio: too many write descriptors in "
559                                "indirect table");
560             goto out;
561         }
562 
563         iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
564         if (!iov[num_sg].iov_base) {
565             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
566             goto out;
567         }
568 
569         iov[num_sg].iov_len = len;
570         addr[num_sg] = pa;
571 
572         sz -= len;
573         pa += len;
574         num_sg++;
575     }
576     ok = true;
577 
578 out:
579     *p_num_sg = num_sg;
580     return ok;
581 }
582 
583 /* Only used by error code paths before we have a VirtQueueElement (therefore
584  * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
585  * yet.
586  */
587 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
588                                     struct iovec *iov)
589 {
590     unsigned int i;
591 
592     for (i = 0; i < out_num + in_num; i++) {
593         int is_write = i >= out_num;
594 
595         cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
596         iov++;
597     }
598 }
599 
600 static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
601                                 unsigned int *num_sg, unsigned int max_size,
602                                 int is_write)
603 {
604     unsigned int i;
605     hwaddr len;
606 
607     /* Note: this function MUST validate input, some callers
608      * are passing in num_sg values received over the network.
609      */
610     /* TODO: teach all callers that this can fail, and return failure instead
611      * of asserting here.
612      * When we do, we might be able to re-enable NDEBUG below.
613      */
614 #ifdef NDEBUG
615 #error building with NDEBUG is not supported
616 #endif
617     assert(*num_sg <= max_size);
618 
619     for (i = 0; i < *num_sg; i++) {
620         len = sg[i].iov_len;
621         sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
622         if (!sg[i].iov_base) {
623             error_report("virtio: error trying to map MMIO memory");
624             exit(1);
625         }
626         if (len != sg[i].iov_len) {
627             error_report("virtio: unexpected memory split");
628             exit(1);
629         }
630     }
631 }
632 
633 void virtqueue_map(VirtQueueElement *elem)
634 {
635     virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num,
636                         VIRTQUEUE_MAX_SIZE, 1);
637     virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num,
638                         VIRTQUEUE_MAX_SIZE, 0);
639 }
640 
641 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
642 {
643     VirtQueueElement *elem;
644     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
645     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
646     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
647     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
648     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
649     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
650 
651     assert(sz >= sizeof(VirtQueueElement));
652     elem = g_malloc(out_sg_end);
653     elem->out_num = out_num;
654     elem->in_num = in_num;
655     elem->in_addr = (void *)elem + in_addr_ofs;
656     elem->out_addr = (void *)elem + out_addr_ofs;
657     elem->in_sg = (void *)elem + in_sg_ofs;
658     elem->out_sg = (void *)elem + out_sg_ofs;
659     return elem;
660 }
661 
662 void *virtqueue_pop(VirtQueue *vq, size_t sz)
663 {
664     unsigned int i, head, max;
665     hwaddr desc_pa = vq->vring.desc;
666     VirtIODevice *vdev = vq->vdev;
667     VirtQueueElement *elem;
668     unsigned out_num, in_num;
669     hwaddr addr[VIRTQUEUE_MAX_SIZE];
670     struct iovec iov[VIRTQUEUE_MAX_SIZE];
671     VRingDesc desc;
672     int rc;
673 
674     if (unlikely(vdev->broken)) {
675         return NULL;
676     }
677     if (virtio_queue_empty(vq)) {
678         return NULL;
679     }
680     /* Needed after virtio_queue_empty(), see comment in
681      * virtqueue_num_heads(). */
682     smp_rmb();
683 
684     /* When we start there are none of either input nor output. */
685     out_num = in_num = 0;
686 
687     max = vq->vring.num;
688 
689     if (vq->inuse >= vq->vring.num) {
690         virtio_error(vdev, "Virtqueue size exceeded");
691         return NULL;
692     }
693 
694     if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
695         return NULL;
696     }
697 
698     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
699         vring_set_avail_event(vq, vq->last_avail_idx);
700     }
701 
702     i = head;
703     vring_desc_read(vdev, &desc, desc_pa, i);
704     if (desc.flags & VRING_DESC_F_INDIRECT) {
705         if (desc.len % sizeof(VRingDesc)) {
706             virtio_error(vdev, "Invalid size for indirect buffer table");
707             return NULL;
708         }
709 
710         /* loop over the indirect descriptor table */
711         max = desc.len / sizeof(VRingDesc);
712         desc_pa = desc.addr;
713         i = 0;
714         vring_desc_read(vdev, &desc, desc_pa, i);
715     }
716 
717     /* Collect all the descriptors */
718     do {
719         bool map_ok;
720 
721         if (desc.flags & VRING_DESC_F_WRITE) {
722             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
723                                         iov + out_num,
724                                         VIRTQUEUE_MAX_SIZE - out_num, true,
725                                         desc.addr, desc.len);
726         } else {
727             if (in_num) {
728                 virtio_error(vdev, "Incorrect order for descriptors");
729                 goto err_undo_map;
730             }
731             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
732                                         VIRTQUEUE_MAX_SIZE, false,
733                                         desc.addr, desc.len);
734         }
735         if (!map_ok) {
736             goto err_undo_map;
737         }
738 
739         /* If we've got too many, that implies a descriptor loop. */
740         if ((in_num + out_num) > max) {
741             virtio_error(vdev, "Looped descriptor");
742             goto err_undo_map;
743         }
744 
745         rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
746     } while (rc == VIRTQUEUE_READ_DESC_MORE);
747 
748     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
749         goto err_undo_map;
750     }
751 
752     /* Now copy what we have collected and mapped */
753     elem = virtqueue_alloc_element(sz, out_num, in_num);
754     elem->index = head;
755     for (i = 0; i < out_num; i++) {
756         elem->out_addr[i] = addr[i];
757         elem->out_sg[i] = iov[i];
758     }
759     for (i = 0; i < in_num; i++) {
760         elem->in_addr[i] = addr[out_num + i];
761         elem->in_sg[i] = iov[out_num + i];
762     }
763 
764     vq->inuse++;
765 
766     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
767     return elem;
768 
769 err_undo_map:
770     virtqueue_undo_map_desc(out_num, in_num, iov);
771     return NULL;
772 }
773 
774 /* Reading and writing a structure directly to QEMUFile is *awful*, but
775  * it is what QEMU has always done by mistake.  We can change it sooner
776  * or later by bumping the version number of the affected vm states.
777  * In the meanwhile, since the in-memory layout of VirtQueueElement
778  * has changed, we need to marshal to and from the layout that was
779  * used before the change.
780  */
781 typedef struct VirtQueueElementOld {
782     unsigned int index;
783     unsigned int out_num;
784     unsigned int in_num;
785     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
786     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
787     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
788     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
789 } VirtQueueElementOld;
790 
791 void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
792 {
793     VirtQueueElement *elem;
794     VirtQueueElementOld data;
795     int i;
796 
797     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
798 
799     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
800     elem->index = data.index;
801 
802     for (i = 0; i < elem->in_num; i++) {
803         elem->in_addr[i] = data.in_addr[i];
804     }
805 
806     for (i = 0; i < elem->out_num; i++) {
807         elem->out_addr[i] = data.out_addr[i];
808     }
809 
810     for (i = 0; i < elem->in_num; i++) {
811         /* Base is overwritten by virtqueue_map.  */
812         elem->in_sg[i].iov_base = 0;
813         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
814     }
815 
816     for (i = 0; i < elem->out_num; i++) {
817         /* Base is overwritten by virtqueue_map.  */
818         elem->out_sg[i].iov_base = 0;
819         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
820     }
821 
822     virtqueue_map(elem);
823     return elem;
824 }
825 
826 void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
827 {
828     VirtQueueElementOld data;
829     int i;
830 
831     memset(&data, 0, sizeof(data));
832     data.index = elem->index;
833     data.in_num = elem->in_num;
834     data.out_num = elem->out_num;
835 
836     for (i = 0; i < elem->in_num; i++) {
837         data.in_addr[i] = elem->in_addr[i];
838     }
839 
840     for (i = 0; i < elem->out_num; i++) {
841         data.out_addr[i] = elem->out_addr[i];
842     }
843 
844     for (i = 0; i < elem->in_num; i++) {
845         /* Base is overwritten by virtqueue_map when loading.  Do not
846          * save it, as it would leak the QEMU address space layout.  */
847         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
848     }
849 
850     for (i = 0; i < elem->out_num; i++) {
851         /* Do not save iov_base as above.  */
852         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
853     }
854     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
855 }
856 
857 /* virtio device */
858 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
859 {
860     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
861     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
862 
863     if (unlikely(vdev->broken)) {
864         return;
865     }
866 
867     if (k->notify) {
868         k->notify(qbus->parent, vector);
869     }
870 }
871 
872 void virtio_update_irq(VirtIODevice *vdev)
873 {
874     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
875 }
876 
877 static int virtio_validate_features(VirtIODevice *vdev)
878 {
879     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
880 
881     if (k->validate_features) {
882         return k->validate_features(vdev);
883     } else {
884         return 0;
885     }
886 }
887 
888 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
889 {
890     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
891     trace_virtio_set_status(vdev, val);
892 
893     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
894         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
895             val & VIRTIO_CONFIG_S_FEATURES_OK) {
896             int ret = virtio_validate_features(vdev);
897 
898             if (ret) {
899                 return ret;
900             }
901         }
902     }
903     if (k->set_status) {
904         k->set_status(vdev, val);
905     }
906     vdev->status = val;
907     return 0;
908 }
909 
910 bool target_words_bigendian(void);
911 static enum virtio_device_endian virtio_default_endian(void)
912 {
913     if (target_words_bigendian()) {
914         return VIRTIO_DEVICE_ENDIAN_BIG;
915     } else {
916         return VIRTIO_DEVICE_ENDIAN_LITTLE;
917     }
918 }
919 
920 static enum virtio_device_endian virtio_current_cpu_endian(void)
921 {
922     CPUClass *cc = CPU_GET_CLASS(current_cpu);
923 
924     if (cc->virtio_is_big_endian(current_cpu)) {
925         return VIRTIO_DEVICE_ENDIAN_BIG;
926     } else {
927         return VIRTIO_DEVICE_ENDIAN_LITTLE;
928     }
929 }
930 
931 void virtio_reset(void *opaque)
932 {
933     VirtIODevice *vdev = opaque;
934     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
935     int i;
936 
937     virtio_set_status(vdev, 0);
938     if (current_cpu) {
939         /* Guest initiated reset */
940         vdev->device_endian = virtio_current_cpu_endian();
941     } else {
942         /* System reset */
943         vdev->device_endian = virtio_default_endian();
944     }
945 
946     if (k->reset) {
947         k->reset(vdev);
948     }
949 
950     vdev->broken = false;
951     vdev->guest_features = 0;
952     vdev->queue_sel = 0;
953     vdev->status = 0;
954     atomic_set(&vdev->isr, 0);
955     vdev->config_vector = VIRTIO_NO_VECTOR;
956     virtio_notify_vector(vdev, vdev->config_vector);
957 
958     for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
959         vdev->vq[i].vring.desc = 0;
960         vdev->vq[i].vring.avail = 0;
961         vdev->vq[i].vring.used = 0;
962         vdev->vq[i].last_avail_idx = 0;
963         vdev->vq[i].shadow_avail_idx = 0;
964         vdev->vq[i].used_idx = 0;
965         virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
966         vdev->vq[i].signalled_used = 0;
967         vdev->vq[i].signalled_used_valid = false;
968         vdev->vq[i].notification_disabled = 0;
969         vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
970         vdev->vq[i].inuse = 0;
971     }
972 }
973 
974 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
975 {
976     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
977     uint8_t val;
978 
979     if (addr + sizeof(val) > vdev->config_len) {
980         return (uint32_t)-1;
981     }
982 
983     k->get_config(vdev, vdev->config);
984 
985     val = ldub_p(vdev->config + addr);
986     return val;
987 }
988 
989 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
990 {
991     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
992     uint16_t val;
993 
994     if (addr + sizeof(val) > vdev->config_len) {
995         return (uint32_t)-1;
996     }
997 
998     k->get_config(vdev, vdev->config);
999 
1000     val = lduw_p(vdev->config + addr);
1001     return val;
1002 }
1003 
1004 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
1005 {
1006     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1007     uint32_t val;
1008 
1009     if (addr + sizeof(val) > vdev->config_len) {
1010         return (uint32_t)-1;
1011     }
1012 
1013     k->get_config(vdev, vdev->config);
1014 
1015     val = ldl_p(vdev->config + addr);
1016     return val;
1017 }
1018 
1019 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1020 {
1021     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1022     uint8_t val = data;
1023 
1024     if (addr + sizeof(val) > vdev->config_len) {
1025         return;
1026     }
1027 
1028     stb_p(vdev->config + addr, val);
1029 
1030     if (k->set_config) {
1031         k->set_config(vdev, vdev->config);
1032     }
1033 }
1034 
1035 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1036 {
1037     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1038     uint16_t val = data;
1039 
1040     if (addr + sizeof(val) > vdev->config_len) {
1041         return;
1042     }
1043 
1044     stw_p(vdev->config + addr, val);
1045 
1046     if (k->set_config) {
1047         k->set_config(vdev, vdev->config);
1048     }
1049 }
1050 
1051 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1052 {
1053     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1054     uint32_t val = data;
1055 
1056     if (addr + sizeof(val) > vdev->config_len) {
1057         return;
1058     }
1059 
1060     stl_p(vdev->config + addr, val);
1061 
1062     if (k->set_config) {
1063         k->set_config(vdev, vdev->config);
1064     }
1065 }
1066 
1067 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
1068 {
1069     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1070     uint8_t val;
1071 
1072     if (addr + sizeof(val) > vdev->config_len) {
1073         return (uint32_t)-1;
1074     }
1075 
1076     k->get_config(vdev, vdev->config);
1077 
1078     val = ldub_p(vdev->config + addr);
1079     return val;
1080 }
1081 
1082 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
1083 {
1084     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1085     uint16_t val;
1086 
1087     if (addr + sizeof(val) > vdev->config_len) {
1088         return (uint32_t)-1;
1089     }
1090 
1091     k->get_config(vdev, vdev->config);
1092 
1093     val = lduw_le_p(vdev->config + addr);
1094     return val;
1095 }
1096 
1097 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
1098 {
1099     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1100     uint32_t val;
1101 
1102     if (addr + sizeof(val) > vdev->config_len) {
1103         return (uint32_t)-1;
1104     }
1105 
1106     k->get_config(vdev, vdev->config);
1107 
1108     val = ldl_le_p(vdev->config + addr);
1109     return val;
1110 }
1111 
1112 void virtio_config_modern_writeb(VirtIODevice *vdev,
1113                                  uint32_t addr, uint32_t data)
1114 {
1115     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1116     uint8_t val = data;
1117 
1118     if (addr + sizeof(val) > vdev->config_len) {
1119         return;
1120     }
1121 
1122     stb_p(vdev->config + addr, val);
1123 
1124     if (k->set_config) {
1125         k->set_config(vdev, vdev->config);
1126     }
1127 }
1128 
1129 void virtio_config_modern_writew(VirtIODevice *vdev,
1130                                  uint32_t addr, uint32_t data)
1131 {
1132     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1133     uint16_t val = data;
1134 
1135     if (addr + sizeof(val) > vdev->config_len) {
1136         return;
1137     }
1138 
1139     stw_le_p(vdev->config + addr, val);
1140 
1141     if (k->set_config) {
1142         k->set_config(vdev, vdev->config);
1143     }
1144 }
1145 
1146 void virtio_config_modern_writel(VirtIODevice *vdev,
1147                                  uint32_t addr, uint32_t data)
1148 {
1149     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1150     uint32_t val = data;
1151 
1152     if (addr + sizeof(val) > vdev->config_len) {
1153         return;
1154     }
1155 
1156     stl_le_p(vdev->config + addr, val);
1157 
1158     if (k->set_config) {
1159         k->set_config(vdev, vdev->config);
1160     }
1161 }
1162 
1163 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
1164 {
1165     vdev->vq[n].vring.desc = addr;
1166     virtio_queue_update_rings(vdev, n);
1167 }
1168 
1169 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
1170 {
1171     return vdev->vq[n].vring.desc;
1172 }
1173 
1174 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
1175                             hwaddr avail, hwaddr used)
1176 {
1177     vdev->vq[n].vring.desc = desc;
1178     vdev->vq[n].vring.avail = avail;
1179     vdev->vq[n].vring.used = used;
1180 }
1181 
1182 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
1183 {
1184     /* Don't allow guest to flip queue between existent and
1185      * nonexistent states, or to set it to an invalid size.
1186      */
1187     if (!!num != !!vdev->vq[n].vring.num ||
1188         num > VIRTQUEUE_MAX_SIZE ||
1189         num < 0) {
1190         return;
1191     }
1192     vdev->vq[n].vring.num = num;
1193 }
1194 
1195 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
1196 {
1197     return QLIST_FIRST(&vdev->vector_queues[vector]);
1198 }
1199 
1200 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
1201 {
1202     return QLIST_NEXT(vq, node);
1203 }
1204 
1205 int virtio_queue_get_num(VirtIODevice *vdev, int n)
1206 {
1207     return vdev->vq[n].vring.num;
1208 }
1209 
1210 int virtio_get_num_queues(VirtIODevice *vdev)
1211 {
1212     int i;
1213 
1214     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1215         if (!virtio_queue_get_num(vdev, i)) {
1216             break;
1217         }
1218     }
1219 
1220     return i;
1221 }
1222 
1223 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
1224 {
1225     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1226     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1227 
1228     /* virtio-1 compliant devices cannot change the alignment */
1229     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1230         error_report("tried to modify queue alignment for virtio-1 device");
1231         return;
1232     }
1233     /* Check that the transport told us it was going to do this
1234      * (so a buggy transport will immediately assert rather than
1235      * silently failing to migrate this state)
1236      */
1237     assert(k->has_variable_vring_alignment);
1238 
1239     vdev->vq[n].vring.align = align;
1240     virtio_queue_update_rings(vdev, n);
1241 }
1242 
1243 static void virtio_queue_notify_aio_vq(VirtQueue *vq)
1244 {
1245     if (vq->vring.desc && vq->handle_aio_output) {
1246         VirtIODevice *vdev = vq->vdev;
1247 
1248         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1249         vq->handle_aio_output(vdev, vq);
1250     }
1251 }
1252 
1253 static void virtio_queue_notify_vq(VirtQueue *vq)
1254 {
1255     if (vq->vring.desc && vq->handle_output) {
1256         VirtIODevice *vdev = vq->vdev;
1257 
1258         if (unlikely(vdev->broken)) {
1259             return;
1260         }
1261 
1262         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1263         vq->handle_output(vdev, vq);
1264     }
1265 }
1266 
1267 void virtio_queue_notify(VirtIODevice *vdev, int n)
1268 {
1269     virtio_queue_notify_vq(&vdev->vq[n]);
1270 }
1271 
1272 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
1273 {
1274     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
1275         VIRTIO_NO_VECTOR;
1276 }
1277 
1278 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
1279 {
1280     VirtQueue *vq = &vdev->vq[n];
1281 
1282     if (n < VIRTIO_QUEUE_MAX) {
1283         if (vdev->vector_queues &&
1284             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
1285             QLIST_REMOVE(vq, node);
1286         }
1287         vdev->vq[n].vector = vector;
1288         if (vdev->vector_queues &&
1289             vector != VIRTIO_NO_VECTOR) {
1290             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
1291         }
1292     }
1293 }
1294 
1295 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
1296                             VirtIOHandleOutput handle_output)
1297 {
1298     int i;
1299 
1300     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1301         if (vdev->vq[i].vring.num == 0)
1302             break;
1303     }
1304 
1305     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
1306         abort();
1307 
1308     vdev->vq[i].vring.num = queue_size;
1309     vdev->vq[i].vring.num_default = queue_size;
1310     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
1311     vdev->vq[i].handle_output = handle_output;
1312     vdev->vq[i].handle_aio_output = NULL;
1313 
1314     return &vdev->vq[i];
1315 }
1316 
1317 void virtio_del_queue(VirtIODevice *vdev, int n)
1318 {
1319     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
1320         abort();
1321     }
1322 
1323     vdev->vq[n].vring.num = 0;
1324     vdev->vq[n].vring.num_default = 0;
1325 }
1326 
1327 static void virtio_set_isr(VirtIODevice *vdev, int value)
1328 {
1329     uint8_t old = atomic_read(&vdev->isr);
1330 
1331     /* Do not write ISR if it does not change, so that its cacheline remains
1332      * shared in the common case where the guest does not read it.
1333      */
1334     if ((old & value) != value) {
1335         atomic_or(&vdev->isr, value);
1336     }
1337 }
1338 
1339 bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
1340 {
1341     uint16_t old, new;
1342     bool v;
1343     /* We need to expose used array entries before checking used event. */
1344     smp_mb();
1345     /* Always notify when queue is empty (when feature acknowledge) */
1346     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
1347         !vq->inuse && virtio_queue_empty(vq)) {
1348         return true;
1349     }
1350 
1351     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1352         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
1353     }
1354 
1355     v = vq->signalled_used_valid;
1356     vq->signalled_used_valid = true;
1357     old = vq->signalled_used;
1358     new = vq->signalled_used = vq->used_idx;
1359     return !v || vring_need_event(vring_get_used_event(vq), new, old);
1360 }
1361 
1362 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
1363 {
1364     if (!virtio_should_notify(vdev, vq)) {
1365         return;
1366     }
1367 
1368     trace_virtio_notify_irqfd(vdev, vq);
1369 
1370     /*
1371      * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
1372      * windows drivers included in virtio-win 1.8.0 (circa 2015) are
1373      * incorrectly polling this bit during crashdump and hibernation
1374      * in MSI mode, causing a hang if this bit is never updated.
1375      * Recent releases of Windows do not really shut down, but rather
1376      * log out and hibernate to make the next startup faster.  Hence,
1377      * this manifested as a more serious hang during shutdown with
1378      *
1379      * Next driver release from 2016 fixed this problem, so working around it
1380      * is not a must, but it's easy to do so let's do it here.
1381      *
1382      * Note: it's safe to update ISR from any thread as it was switched
1383      * to an atomic operation.
1384      */
1385     virtio_set_isr(vq->vdev, 0x1);
1386     event_notifier_set(&vq->guest_notifier);
1387 }
1388 
1389 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
1390 {
1391     if (!virtio_should_notify(vdev, vq)) {
1392         return;
1393     }
1394 
1395     trace_virtio_notify(vdev, vq);
1396     virtio_set_isr(vq->vdev, 0x1);
1397     virtio_notify_vector(vdev, vq->vector);
1398 }
1399 
1400 void virtio_notify_config(VirtIODevice *vdev)
1401 {
1402     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1403         return;
1404 
1405     virtio_set_isr(vdev, 0x3);
1406     vdev->generation++;
1407     virtio_notify_vector(vdev, vdev->config_vector);
1408 }
1409 
1410 static bool virtio_device_endian_needed(void *opaque)
1411 {
1412     VirtIODevice *vdev = opaque;
1413 
1414     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
1415     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1416         return vdev->device_endian != virtio_default_endian();
1417     }
1418     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
1419     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
1420 }
1421 
1422 static bool virtio_64bit_features_needed(void *opaque)
1423 {
1424     VirtIODevice *vdev = opaque;
1425 
1426     return (vdev->host_features >> 32) != 0;
1427 }
1428 
1429 static bool virtio_virtqueue_needed(void *opaque)
1430 {
1431     VirtIODevice *vdev = opaque;
1432 
1433     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
1434 }
1435 
1436 static bool virtio_ringsize_needed(void *opaque)
1437 {
1438     VirtIODevice *vdev = opaque;
1439     int i;
1440 
1441     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1442         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
1443             return true;
1444         }
1445     }
1446     return false;
1447 }
1448 
1449 static bool virtio_extra_state_needed(void *opaque)
1450 {
1451     VirtIODevice *vdev = opaque;
1452     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1453     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1454 
1455     return k->has_extra_state &&
1456         k->has_extra_state(qbus->parent);
1457 }
1458 
1459 static bool virtio_broken_needed(void *opaque)
1460 {
1461     VirtIODevice *vdev = opaque;
1462 
1463     return vdev->broken;
1464 }
1465 
1466 static const VMStateDescription vmstate_virtqueue = {
1467     .name = "virtqueue_state",
1468     .version_id = 1,
1469     .minimum_version_id = 1,
1470     .fields = (VMStateField[]) {
1471         VMSTATE_UINT64(vring.avail, struct VirtQueue),
1472         VMSTATE_UINT64(vring.used, struct VirtQueue),
1473         VMSTATE_END_OF_LIST()
1474     }
1475 };
1476 
1477 static const VMStateDescription vmstate_virtio_virtqueues = {
1478     .name = "virtio/virtqueues",
1479     .version_id = 1,
1480     .minimum_version_id = 1,
1481     .needed = &virtio_virtqueue_needed,
1482     .fields = (VMStateField[]) {
1483         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1484                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
1485         VMSTATE_END_OF_LIST()
1486     }
1487 };
1488 
1489 static const VMStateDescription vmstate_ringsize = {
1490     .name = "ringsize_state",
1491     .version_id = 1,
1492     .minimum_version_id = 1,
1493     .fields = (VMStateField[]) {
1494         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
1495         VMSTATE_END_OF_LIST()
1496     }
1497 };
1498 
1499 static const VMStateDescription vmstate_virtio_ringsize = {
1500     .name = "virtio/ringsize",
1501     .version_id = 1,
1502     .minimum_version_id = 1,
1503     .needed = &virtio_ringsize_needed,
1504     .fields = (VMStateField[]) {
1505         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1506                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
1507         VMSTATE_END_OF_LIST()
1508     }
1509 };
1510 
1511 static int get_extra_state(QEMUFile *f, void *pv, size_t size)
1512 {
1513     VirtIODevice *vdev = pv;
1514     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1515     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1516 
1517     if (!k->load_extra_state) {
1518         return -1;
1519     } else {
1520         return k->load_extra_state(qbus->parent, f);
1521     }
1522 }
1523 
1524 static void put_extra_state(QEMUFile *f, void *pv, size_t size)
1525 {
1526     VirtIODevice *vdev = pv;
1527     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1528     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1529 
1530     k->save_extra_state(qbus->parent, f);
1531 }
1532 
1533 static const VMStateInfo vmstate_info_extra_state = {
1534     .name = "virtqueue_extra_state",
1535     .get = get_extra_state,
1536     .put = put_extra_state,
1537 };
1538 
1539 static const VMStateDescription vmstate_virtio_extra_state = {
1540     .name = "virtio/extra_state",
1541     .version_id = 1,
1542     .minimum_version_id = 1,
1543     .needed = &virtio_extra_state_needed,
1544     .fields = (VMStateField[]) {
1545         {
1546             .name         = "extra_state",
1547             .version_id   = 0,
1548             .field_exists = NULL,
1549             .size         = 0,
1550             .info         = &vmstate_info_extra_state,
1551             .flags        = VMS_SINGLE,
1552             .offset       = 0,
1553         },
1554         VMSTATE_END_OF_LIST()
1555     }
1556 };
1557 
1558 static const VMStateDescription vmstate_virtio_device_endian = {
1559     .name = "virtio/device_endian",
1560     .version_id = 1,
1561     .minimum_version_id = 1,
1562     .needed = &virtio_device_endian_needed,
1563     .fields = (VMStateField[]) {
1564         VMSTATE_UINT8(device_endian, VirtIODevice),
1565         VMSTATE_END_OF_LIST()
1566     }
1567 };
1568 
1569 static const VMStateDescription vmstate_virtio_64bit_features = {
1570     .name = "virtio/64bit_features",
1571     .version_id = 1,
1572     .minimum_version_id = 1,
1573     .needed = &virtio_64bit_features_needed,
1574     .fields = (VMStateField[]) {
1575         VMSTATE_UINT64(guest_features, VirtIODevice),
1576         VMSTATE_END_OF_LIST()
1577     }
1578 };
1579 
1580 static const VMStateDescription vmstate_virtio_broken = {
1581     .name = "virtio/broken",
1582     .version_id = 1,
1583     .minimum_version_id = 1,
1584     .needed = &virtio_broken_needed,
1585     .fields = (VMStateField[]) {
1586         VMSTATE_BOOL(broken, VirtIODevice),
1587         VMSTATE_END_OF_LIST()
1588     }
1589 };
1590 
1591 static const VMStateDescription vmstate_virtio = {
1592     .name = "virtio",
1593     .version_id = 1,
1594     .minimum_version_id = 1,
1595     .minimum_version_id_old = 1,
1596     .fields = (VMStateField[]) {
1597         VMSTATE_END_OF_LIST()
1598     },
1599     .subsections = (const VMStateDescription*[]) {
1600         &vmstate_virtio_device_endian,
1601         &vmstate_virtio_64bit_features,
1602         &vmstate_virtio_virtqueues,
1603         &vmstate_virtio_ringsize,
1604         &vmstate_virtio_broken,
1605         &vmstate_virtio_extra_state,
1606         NULL
1607     }
1608 };
1609 
1610 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
1611 {
1612     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1613     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1614     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1615     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
1616     int i;
1617 
1618     if (k->save_config) {
1619         k->save_config(qbus->parent, f);
1620     }
1621 
1622     qemu_put_8s(f, &vdev->status);
1623     qemu_put_8s(f, &vdev->isr);
1624     qemu_put_be16s(f, &vdev->queue_sel);
1625     qemu_put_be32s(f, &guest_features_lo);
1626     qemu_put_be32(f, vdev->config_len);
1627     qemu_put_buffer(f, vdev->config, vdev->config_len);
1628 
1629     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1630         if (vdev->vq[i].vring.num == 0)
1631             break;
1632     }
1633 
1634     qemu_put_be32(f, i);
1635 
1636     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1637         if (vdev->vq[i].vring.num == 0)
1638             break;
1639 
1640         qemu_put_be32(f, vdev->vq[i].vring.num);
1641         if (k->has_variable_vring_alignment) {
1642             qemu_put_be32(f, vdev->vq[i].vring.align);
1643         }
1644         /* XXX virtio-1 devices */
1645         qemu_put_be64(f, vdev->vq[i].vring.desc);
1646         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
1647         if (k->save_queue) {
1648             k->save_queue(qbus->parent, i, f);
1649         }
1650     }
1651 
1652     if (vdc->save != NULL) {
1653         vdc->save(vdev, f);
1654     }
1655 
1656     if (vdc->vmsd) {
1657         vmstate_save_state(f, vdc->vmsd, vdev, NULL);
1658     }
1659 
1660     /* Subsections */
1661     vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
1662 }
1663 
1664 /* A wrapper for use as a VMState .put function */
1665 static void virtio_device_put(QEMUFile *f, void *opaque, size_t size)
1666 {
1667     virtio_save(VIRTIO_DEVICE(opaque), f);
1668 }
1669 
1670 /* A wrapper for use as a VMState .get function */
1671 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size)
1672 {
1673     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
1674     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
1675 
1676     return virtio_load(vdev, f, dc->vmsd->version_id);
1677 }
1678 
1679 const VMStateInfo  virtio_vmstate_info = {
1680     .name = "virtio",
1681     .get = virtio_device_get,
1682     .put = virtio_device_put,
1683 };
1684 
1685 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
1686 {
1687     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1688     bool bad = (val & ~(vdev->host_features)) != 0;
1689 
1690     val &= vdev->host_features;
1691     if (k->set_features) {
1692         k->set_features(vdev, val);
1693     }
1694     vdev->guest_features = val;
1695     return bad ? -1 : 0;
1696 }
1697 
1698 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
1699 {
1700    /*
1701      * The driver must not attempt to set features after feature negotiation
1702      * has finished.
1703      */
1704     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
1705         return -EINVAL;
1706     }
1707     return virtio_set_features_nocheck(vdev, val);
1708 }
1709 
1710 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
1711 {
1712     int i, ret;
1713     int32_t config_len;
1714     uint32_t num;
1715     uint32_t features;
1716     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1717     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1718     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1719 
1720     /*
1721      * We poison the endianness to ensure it does not get used before
1722      * subsections have been loaded.
1723      */
1724     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
1725 
1726     if (k->load_config) {
1727         ret = k->load_config(qbus->parent, f);
1728         if (ret)
1729             return ret;
1730     }
1731 
1732     qemu_get_8s(f, &vdev->status);
1733     qemu_get_8s(f, &vdev->isr);
1734     qemu_get_be16s(f, &vdev->queue_sel);
1735     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
1736         return -1;
1737     }
1738     qemu_get_be32s(f, &features);
1739 
1740     /*
1741      * Temporarily set guest_features low bits - needed by
1742      * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
1743      * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
1744      *
1745      * Note: devices should always test host features in future - don't create
1746      * new dependencies like this.
1747      */
1748     vdev->guest_features = features;
1749 
1750     config_len = qemu_get_be32(f);
1751 
1752     /*
1753      * There are cases where the incoming config can be bigger or smaller
1754      * than what we have; so load what we have space for, and skip
1755      * any excess that's in the stream.
1756      */
1757     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
1758 
1759     while (config_len > vdev->config_len) {
1760         qemu_get_byte(f);
1761         config_len--;
1762     }
1763 
1764     num = qemu_get_be32(f);
1765 
1766     if (num > VIRTIO_QUEUE_MAX) {
1767         error_report("Invalid number of virtqueues: 0x%x", num);
1768         return -1;
1769     }
1770 
1771     for (i = 0; i < num; i++) {
1772         vdev->vq[i].vring.num = qemu_get_be32(f);
1773         if (k->has_variable_vring_alignment) {
1774             vdev->vq[i].vring.align = qemu_get_be32(f);
1775         }
1776         vdev->vq[i].vring.desc = qemu_get_be64(f);
1777         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
1778         vdev->vq[i].signalled_used_valid = false;
1779         vdev->vq[i].notification_disabled = 0;
1780 
1781         if (vdev->vq[i].vring.desc) {
1782             /* XXX virtio-1 devices */
1783             virtio_queue_update_rings(vdev, i);
1784         } else if (vdev->vq[i].last_avail_idx) {
1785             error_report("VQ %d address 0x0 "
1786                          "inconsistent with Host index 0x%x",
1787                          i, vdev->vq[i].last_avail_idx);
1788                 return -1;
1789         }
1790         if (k->load_queue) {
1791             ret = k->load_queue(qbus->parent, i, f);
1792             if (ret)
1793                 return ret;
1794         }
1795     }
1796 
1797     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1798 
1799     if (vdc->load != NULL) {
1800         ret = vdc->load(vdev, f, version_id);
1801         if (ret) {
1802             return ret;
1803         }
1804     }
1805 
1806     if (vdc->vmsd) {
1807         ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
1808         if (ret) {
1809             return ret;
1810         }
1811     }
1812 
1813     /* Subsections */
1814     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
1815     if (ret) {
1816         return ret;
1817     }
1818 
1819     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
1820         vdev->device_endian = virtio_default_endian();
1821     }
1822 
1823     if (virtio_64bit_features_needed(vdev)) {
1824         /*
1825          * Subsection load filled vdev->guest_features.  Run them
1826          * through virtio_set_features to sanity-check them against
1827          * host_features.
1828          */
1829         uint64_t features64 = vdev->guest_features;
1830         if (virtio_set_features_nocheck(vdev, features64) < 0) {
1831             error_report("Features 0x%" PRIx64 " unsupported. "
1832                          "Allowed features: 0x%" PRIx64,
1833                          features64, vdev->host_features);
1834             return -1;
1835         }
1836     } else {
1837         if (virtio_set_features_nocheck(vdev, features) < 0) {
1838             error_report("Features 0x%x unsupported. "
1839                          "Allowed features: 0x%" PRIx64,
1840                          features, vdev->host_features);
1841             return -1;
1842         }
1843     }
1844 
1845     for (i = 0; i < num; i++) {
1846         if (vdev->vq[i].vring.desc) {
1847             uint16_t nheads;
1848             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
1849             /* Check it isn't doing strange things with descriptor numbers. */
1850             if (nheads > vdev->vq[i].vring.num) {
1851                 error_report("VQ %d size 0x%x Guest index 0x%x "
1852                              "inconsistent with Host index 0x%x: delta 0x%x",
1853                              i, vdev->vq[i].vring.num,
1854                              vring_avail_idx(&vdev->vq[i]),
1855                              vdev->vq[i].last_avail_idx, nheads);
1856                 return -1;
1857             }
1858             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
1859             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
1860 
1861             /*
1862              * Some devices migrate VirtQueueElements that have been popped
1863              * from the avail ring but not yet returned to the used ring.
1864              */
1865             vdev->vq[i].inuse = vdev->vq[i].last_avail_idx -
1866                                 vdev->vq[i].used_idx;
1867             if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
1868                 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
1869                              "used_idx 0x%x",
1870                              i, vdev->vq[i].vring.num,
1871                              vdev->vq[i].last_avail_idx,
1872                              vdev->vq[i].used_idx);
1873                 return -1;
1874             }
1875         }
1876     }
1877 
1878     return 0;
1879 }
1880 
1881 void virtio_cleanup(VirtIODevice *vdev)
1882 {
1883     qemu_del_vm_change_state_handler(vdev->vmstate);
1884     g_free(vdev->config);
1885     g_free(vdev->vq);
1886     g_free(vdev->vector_queues);
1887 }
1888 
1889 static void virtio_vmstate_change(void *opaque, int running, RunState state)
1890 {
1891     VirtIODevice *vdev = opaque;
1892     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1893     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1894     bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
1895     vdev->vm_running = running;
1896 
1897     if (backend_run) {
1898         virtio_set_status(vdev, vdev->status);
1899     }
1900 
1901     if (k->vmstate_change) {
1902         k->vmstate_change(qbus->parent, backend_run);
1903     }
1904 
1905     if (!backend_run) {
1906         virtio_set_status(vdev, vdev->status);
1907     }
1908 }
1909 
1910 void virtio_instance_init_common(Object *proxy_obj, void *data,
1911                                  size_t vdev_size, const char *vdev_name)
1912 {
1913     DeviceState *vdev = data;
1914 
1915     object_initialize(vdev, vdev_size, vdev_name);
1916     object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
1917     object_unref(OBJECT(vdev));
1918     qdev_alias_all_properties(vdev, proxy_obj);
1919 }
1920 
1921 void virtio_init(VirtIODevice *vdev, const char *name,
1922                  uint16_t device_id, size_t config_size)
1923 {
1924     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1925     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1926     int i;
1927     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
1928 
1929     if (nvectors) {
1930         vdev->vector_queues =
1931             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
1932     }
1933 
1934     vdev->device_id = device_id;
1935     vdev->status = 0;
1936     atomic_set(&vdev->isr, 0);
1937     vdev->queue_sel = 0;
1938     vdev->config_vector = VIRTIO_NO_VECTOR;
1939     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
1940     vdev->vm_running = runstate_is_running();
1941     vdev->broken = false;
1942     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1943         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1944         vdev->vq[i].vdev = vdev;
1945         vdev->vq[i].queue_index = i;
1946     }
1947 
1948     vdev->name = name;
1949     vdev->config_len = config_size;
1950     if (vdev->config_len) {
1951         vdev->config = g_malloc0(config_size);
1952     } else {
1953         vdev->config = NULL;
1954     }
1955     vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
1956                                                      vdev);
1957     vdev->device_endian = virtio_default_endian();
1958     vdev->use_guest_notifier_mask = true;
1959 }
1960 
1961 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1962 {
1963     return vdev->vq[n].vring.desc;
1964 }
1965 
1966 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1967 {
1968     return vdev->vq[n].vring.avail;
1969 }
1970 
1971 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1972 {
1973     return vdev->vq[n].vring.used;
1974 }
1975 
1976 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1977 {
1978     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1979 }
1980 
1981 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1982 {
1983     return offsetof(VRingAvail, ring) +
1984         sizeof(uint16_t) * vdev->vq[n].vring.num;
1985 }
1986 
1987 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1988 {
1989     return offsetof(VRingUsed, ring) +
1990         sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1991 }
1992 
1993 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1994 {
1995     return vdev->vq[n].last_avail_idx;
1996 }
1997 
1998 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1999 {
2000     vdev->vq[n].last_avail_idx = idx;
2001     vdev->vq[n].shadow_avail_idx = idx;
2002 }
2003 
2004 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
2005 {
2006     vdev->vq[n].signalled_used_valid = false;
2007 }
2008 
2009 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
2010 {
2011     return vdev->vq + n;
2012 }
2013 
2014 uint16_t virtio_get_queue_index(VirtQueue *vq)
2015 {
2016     return vq->queue_index;
2017 }
2018 
2019 static void virtio_queue_guest_notifier_read(EventNotifier *n)
2020 {
2021     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
2022     if (event_notifier_test_and_clear(n)) {
2023         virtio_notify_vector(vq->vdev, vq->vector);
2024     }
2025 }
2026 
2027 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
2028                                                 bool with_irqfd)
2029 {
2030     if (assign && !with_irqfd) {
2031         event_notifier_set_handler(&vq->guest_notifier, false,
2032                                    virtio_queue_guest_notifier_read);
2033     } else {
2034         event_notifier_set_handler(&vq->guest_notifier, false, NULL);
2035     }
2036     if (!assign) {
2037         /* Test and clear notifier before closing it,
2038          * in case poll callback didn't have time to run. */
2039         virtio_queue_guest_notifier_read(&vq->guest_notifier);
2040     }
2041 }
2042 
2043 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
2044 {
2045     return &vq->guest_notifier;
2046 }
2047 
2048 static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
2049 {
2050     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2051     if (event_notifier_test_and_clear(n)) {
2052         virtio_queue_notify_aio_vq(vq);
2053     }
2054 }
2055 
2056 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
2057 {
2058     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2059 
2060     virtio_queue_set_notification(vq, 0);
2061 }
2062 
2063 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
2064 {
2065     EventNotifier *n = opaque;
2066     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2067 
2068     if (virtio_queue_empty(vq)) {
2069         return false;
2070     }
2071 
2072     virtio_queue_notify_aio_vq(vq);
2073     return true;
2074 }
2075 
2076 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
2077 {
2078     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2079 
2080     /* Caller polls once more after this to catch requests that race with us */
2081     virtio_queue_set_notification(vq, 1);
2082 }
2083 
2084 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
2085                                                 VirtIOHandleOutput handle_output)
2086 {
2087     if (handle_output) {
2088         vq->handle_aio_output = handle_output;
2089         aio_set_event_notifier(ctx, &vq->host_notifier, true,
2090                                virtio_queue_host_notifier_aio_read,
2091                                virtio_queue_host_notifier_aio_poll);
2092         aio_set_event_notifier_poll(ctx, &vq->host_notifier,
2093                                     virtio_queue_host_notifier_aio_poll_begin,
2094                                     virtio_queue_host_notifier_aio_poll_end);
2095     } else {
2096         aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
2097         /* Test and clear notifier before after disabling event,
2098          * in case poll callback didn't have time to run. */
2099         virtio_queue_host_notifier_aio_read(&vq->host_notifier);
2100         vq->handle_aio_output = NULL;
2101     }
2102 }
2103 
2104 void virtio_queue_host_notifier_read(EventNotifier *n)
2105 {
2106     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2107     if (event_notifier_test_and_clear(n)) {
2108         virtio_queue_notify_vq(vq);
2109     }
2110 }
2111 
2112 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
2113 {
2114     return &vq->host_notifier;
2115 }
2116 
2117 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
2118 {
2119     g_free(vdev->bus_name);
2120     vdev->bus_name = g_strdup(bus_name);
2121 }
2122 
2123 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
2124 {
2125     va_list ap;
2126 
2127     va_start(ap, fmt);
2128     error_vreport(fmt, ap);
2129     va_end(ap);
2130 
2131     vdev->broken = true;
2132 
2133     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2134         virtio_set_status(vdev, vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET);
2135         virtio_notify_config(vdev);
2136     }
2137 }
2138 
2139 static void virtio_device_realize(DeviceState *dev, Error **errp)
2140 {
2141     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2142     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
2143     Error *err = NULL;
2144 
2145     /* Devices should either use vmsd or the load/save methods */
2146     assert(!vdc->vmsd || !vdc->load);
2147 
2148     if (vdc->realize != NULL) {
2149         vdc->realize(dev, &err);
2150         if (err != NULL) {
2151             error_propagate(errp, err);
2152             return;
2153         }
2154     }
2155 
2156     virtio_bus_device_plugged(vdev, &err);
2157     if (err != NULL) {
2158         error_propagate(errp, err);
2159         return;
2160     }
2161 }
2162 
2163 static void virtio_device_unrealize(DeviceState *dev, Error **errp)
2164 {
2165     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2166     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
2167     Error *err = NULL;
2168 
2169     virtio_bus_device_unplugged(vdev);
2170 
2171     if (vdc->unrealize != NULL) {
2172         vdc->unrealize(dev, &err);
2173         if (err != NULL) {
2174             error_propagate(errp, err);
2175             return;
2176         }
2177     }
2178 
2179     g_free(vdev->bus_name);
2180     vdev->bus_name = NULL;
2181 }
2182 
2183 static Property virtio_properties[] = {
2184     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
2185     DEFINE_PROP_END_OF_LIST(),
2186 };
2187 
2188 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
2189 {
2190     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
2191     int n, r, err;
2192 
2193     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2194         VirtQueue *vq = &vdev->vq[n];
2195         if (!virtio_queue_get_num(vdev, n)) {
2196             continue;
2197         }
2198         r = virtio_bus_set_host_notifier(qbus, n, true);
2199         if (r < 0) {
2200             err = r;
2201             goto assign_error;
2202         }
2203         event_notifier_set_handler(&vq->host_notifier, true,
2204                                    virtio_queue_host_notifier_read);
2205     }
2206 
2207     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2208         /* Kick right away to begin processing requests already in vring */
2209         VirtQueue *vq = &vdev->vq[n];
2210         if (!vq->vring.num) {
2211             continue;
2212         }
2213         event_notifier_set(&vq->host_notifier);
2214     }
2215     return 0;
2216 
2217 assign_error:
2218     while (--n >= 0) {
2219         VirtQueue *vq = &vdev->vq[n];
2220         if (!virtio_queue_get_num(vdev, n)) {
2221             continue;
2222         }
2223 
2224         event_notifier_set_handler(&vq->host_notifier, true, NULL);
2225         r = virtio_bus_set_host_notifier(qbus, n, false);
2226         assert(r >= 0);
2227     }
2228     return err;
2229 }
2230 
2231 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
2232 {
2233     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2234     VirtioBusState *vbus = VIRTIO_BUS(qbus);
2235 
2236     return virtio_bus_start_ioeventfd(vbus);
2237 }
2238 
2239 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
2240 {
2241     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
2242     int n, r;
2243 
2244     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2245         VirtQueue *vq = &vdev->vq[n];
2246 
2247         if (!virtio_queue_get_num(vdev, n)) {
2248             continue;
2249         }
2250         event_notifier_set_handler(&vq->host_notifier, true, NULL);
2251         r = virtio_bus_set_host_notifier(qbus, n, false);
2252         assert(r >= 0);
2253     }
2254 }
2255 
2256 void virtio_device_stop_ioeventfd(VirtIODevice *vdev)
2257 {
2258     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2259     VirtioBusState *vbus = VIRTIO_BUS(qbus);
2260 
2261     virtio_bus_stop_ioeventfd(vbus);
2262 }
2263 
2264 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
2265 {
2266     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2267     VirtioBusState *vbus = VIRTIO_BUS(qbus);
2268 
2269     return virtio_bus_grab_ioeventfd(vbus);
2270 }
2271 
2272 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
2273 {
2274     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2275     VirtioBusState *vbus = VIRTIO_BUS(qbus);
2276 
2277     virtio_bus_release_ioeventfd(vbus);
2278 }
2279 
2280 static void virtio_device_class_init(ObjectClass *klass, void *data)
2281 {
2282     /* Set the default value here. */
2283     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2284     DeviceClass *dc = DEVICE_CLASS(klass);
2285 
2286     dc->realize = virtio_device_realize;
2287     dc->unrealize = virtio_device_unrealize;
2288     dc->bus_type = TYPE_VIRTIO_BUS;
2289     dc->props = virtio_properties;
2290     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
2291     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
2292 
2293     vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
2294 }
2295 
2296 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
2297 {
2298     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2299     VirtioBusState *vbus = VIRTIO_BUS(qbus);
2300 
2301     return virtio_bus_ioeventfd_enabled(vbus);
2302 }
2303 
2304 static const TypeInfo virtio_device_info = {
2305     .name = TYPE_VIRTIO_DEVICE,
2306     .parent = TYPE_DEVICE,
2307     .instance_size = sizeof(VirtIODevice),
2308     .class_init = virtio_device_class_init,
2309     .abstract = true,
2310     .class_size = sizeof(VirtioDeviceClass),
2311 };
2312 
2313 static void virtio_register_types(void)
2314 {
2315     type_register_static(&virtio_device_info);
2316 }
2317 
2318 type_init(virtio_register_types)
2319