xref: /openbmc/qemu/hw/virtio/virtio.c (revision f6a51c84)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qemu-common.h"
17 #include "cpu.h"
18 #include "trace.h"
19 #include "exec/address-spaces.h"
20 #include "qemu/error-report.h"
21 #include "hw/virtio/virtio.h"
22 #include "qemu/atomic.h"
23 #include "hw/virtio/virtio-bus.h"
24 #include "migration/migration.h"
25 #include "hw/virtio/virtio-access.h"
26 
27 /*
28  * The alignment to use between consumer and producer parts of vring.
29  * x86 pagesize again. This is the default, used by transports like PCI
30  * which don't provide a means for the guest to tell the host the alignment.
31  */
32 #define VIRTIO_PCI_VRING_ALIGN         4096
33 
34 typedef struct VRingDesc
35 {
36     uint64_t addr;
37     uint32_t len;
38     uint16_t flags;
39     uint16_t next;
40 } VRingDesc;
41 
42 typedef struct VRingAvail
43 {
44     uint16_t flags;
45     uint16_t idx;
46     uint16_t ring[0];
47 } VRingAvail;
48 
49 typedef struct VRingUsedElem
50 {
51     uint32_t id;
52     uint32_t len;
53 } VRingUsedElem;
54 
55 typedef struct VRingUsed
56 {
57     uint16_t flags;
58     uint16_t idx;
59     VRingUsedElem ring[0];
60 } VRingUsed;
61 
62 typedef struct VRing
63 {
64     unsigned int num;
65     unsigned int num_default;
66     unsigned int align;
67     hwaddr desc;
68     hwaddr avail;
69     hwaddr used;
70 } VRing;
71 
72 struct VirtQueue
73 {
74     VRing vring;
75 
76     /* Next head to pop */
77     uint16_t last_avail_idx;
78 
79     /* Last avail_idx read from VQ. */
80     uint16_t shadow_avail_idx;
81 
82     uint16_t used_idx;
83 
84     /* Last used index value we have signalled on */
85     uint16_t signalled_used;
86 
87     /* Last used index value we have signalled on */
88     bool signalled_used_valid;
89 
90     /* Notification enabled? */
91     bool notification;
92 
93     uint16_t queue_index;
94 
95     int inuse;
96 
97     uint16_t vector;
98     VirtIOHandleOutput handle_output;
99     VirtIOHandleOutput handle_aio_output;
100     VirtIODevice *vdev;
101     EventNotifier guest_notifier;
102     EventNotifier host_notifier;
103     QLIST_ENTRY(VirtQueue) node;
104 };
105 
106 /* virt queue functions */
107 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
108 {
109     VRing *vring = &vdev->vq[n].vring;
110 
111     if (!vring->desc) {
112         /* not yet setup -> nothing to do */
113         return;
114     }
115     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
116     vring->used = vring_align(vring->avail +
117                               offsetof(VRingAvail, ring[vring->num]),
118                               vring->align);
119 }
120 
121 static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
122                             hwaddr desc_pa, int i)
123 {
124     address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc),
125                        MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
126     virtio_tswap64s(vdev, &desc->addr);
127     virtio_tswap32s(vdev, &desc->len);
128     virtio_tswap16s(vdev, &desc->flags);
129     virtio_tswap16s(vdev, &desc->next);
130 }
131 
132 static inline uint16_t vring_avail_flags(VirtQueue *vq)
133 {
134     hwaddr pa;
135     pa = vq->vring.avail + offsetof(VRingAvail, flags);
136     return virtio_lduw_phys(vq->vdev, pa);
137 }
138 
139 static inline uint16_t vring_avail_idx(VirtQueue *vq)
140 {
141     hwaddr pa;
142     pa = vq->vring.avail + offsetof(VRingAvail, idx);
143     vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
144     return vq->shadow_avail_idx;
145 }
146 
147 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
148 {
149     hwaddr pa;
150     pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
151     return virtio_lduw_phys(vq->vdev, pa);
152 }
153 
154 static inline uint16_t vring_get_used_event(VirtQueue *vq)
155 {
156     return vring_avail_ring(vq, vq->vring.num);
157 }
158 
159 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
160                                     int i)
161 {
162     hwaddr pa;
163     virtio_tswap32s(vq->vdev, &uelem->id);
164     virtio_tswap32s(vq->vdev, &uelem->len);
165     pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
166     address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED,
167                        (void *)uelem, sizeof(VRingUsedElem));
168 }
169 
170 static uint16_t vring_used_idx(VirtQueue *vq)
171 {
172     hwaddr pa;
173     pa = vq->vring.used + offsetof(VRingUsed, idx);
174     return virtio_lduw_phys(vq->vdev, pa);
175 }
176 
177 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
178 {
179     hwaddr pa;
180     pa = vq->vring.used + offsetof(VRingUsed, idx);
181     virtio_stw_phys(vq->vdev, pa, val);
182     vq->used_idx = val;
183 }
184 
185 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
186 {
187     VirtIODevice *vdev = vq->vdev;
188     hwaddr pa;
189     pa = vq->vring.used + offsetof(VRingUsed, flags);
190     virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
191 }
192 
193 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
194 {
195     VirtIODevice *vdev = vq->vdev;
196     hwaddr pa;
197     pa = vq->vring.used + offsetof(VRingUsed, flags);
198     virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
199 }
200 
201 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
202 {
203     hwaddr pa;
204     if (!vq->notification) {
205         return;
206     }
207     pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
208     virtio_stw_phys(vq->vdev, pa, val);
209 }
210 
211 void virtio_queue_set_notification(VirtQueue *vq, int enable)
212 {
213     vq->notification = enable;
214     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
215         vring_set_avail_event(vq, vring_avail_idx(vq));
216     } else if (enable) {
217         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
218     } else {
219         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
220     }
221     if (enable) {
222         /* Expose avail event/used flags before caller checks the avail idx. */
223         smp_mb();
224     }
225 }
226 
227 int virtio_queue_ready(VirtQueue *vq)
228 {
229     return vq->vring.avail != 0;
230 }
231 
232 /* Fetch avail_idx from VQ memory only when we really need to know if
233  * guest has added some buffers. */
234 int virtio_queue_empty(VirtQueue *vq)
235 {
236     if (vq->shadow_avail_idx != vq->last_avail_idx) {
237         return 0;
238     }
239 
240     return vring_avail_idx(vq) == vq->last_avail_idx;
241 }
242 
243 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
244                                unsigned int len)
245 {
246     unsigned int offset;
247     int i;
248 
249     offset = 0;
250     for (i = 0; i < elem->in_num; i++) {
251         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
252 
253         cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
254                                   elem->in_sg[i].iov_len,
255                                   1, size);
256 
257         offset += size;
258     }
259 
260     for (i = 0; i < elem->out_num; i++)
261         cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
262                                   elem->out_sg[i].iov_len,
263                                   0, elem->out_sg[i].iov_len);
264 }
265 
266 /* virtqueue_detach_element:
267  * @vq: The #VirtQueue
268  * @elem: The #VirtQueueElement
269  * @len: number of bytes written
270  *
271  * Detach the element from the virtqueue.  This function is suitable for device
272  * reset or other situations where a #VirtQueueElement is simply freed and will
273  * not be pushed or discarded.
274  */
275 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
276                               unsigned int len)
277 {
278     vq->inuse--;
279     virtqueue_unmap_sg(vq, elem, len);
280 }
281 
282 /* virtqueue_unpop:
283  * @vq: The #VirtQueue
284  * @elem: The #VirtQueueElement
285  * @len: number of bytes written
286  *
287  * Pretend the most recent element wasn't popped from the virtqueue.  The next
288  * call to virtqueue_pop() will refetch the element.
289  */
290 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
291                      unsigned int len)
292 {
293     vq->last_avail_idx--;
294     virtqueue_detach_element(vq, elem, len);
295 }
296 
297 /* virtqueue_rewind:
298  * @vq: The #VirtQueue
299  * @num: Number of elements to push back
300  *
301  * Pretend that elements weren't popped from the virtqueue.  The next
302  * virtqueue_pop() will refetch the oldest element.
303  *
304  * Use virtqueue_unpop() instead if you have a VirtQueueElement.
305  *
306  * Returns: true on success, false if @num is greater than the number of in use
307  * elements.
308  */
309 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
310 {
311     if (num > vq->inuse) {
312         return false;
313     }
314     vq->last_avail_idx -= num;
315     vq->inuse -= num;
316     return true;
317 }
318 
319 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
320                     unsigned int len, unsigned int idx)
321 {
322     VRingUsedElem uelem;
323 
324     trace_virtqueue_fill(vq, elem, len, idx);
325 
326     virtqueue_unmap_sg(vq, elem, len);
327 
328     if (unlikely(vq->vdev->broken)) {
329         return;
330     }
331 
332     idx = (idx + vq->used_idx) % vq->vring.num;
333 
334     uelem.id = elem->index;
335     uelem.len = len;
336     vring_used_write(vq, &uelem, idx);
337 }
338 
339 void virtqueue_flush(VirtQueue *vq, unsigned int count)
340 {
341     uint16_t old, new;
342 
343     if (unlikely(vq->vdev->broken)) {
344         vq->inuse -= count;
345         return;
346     }
347 
348     /* Make sure buffer is written before we update index. */
349     smp_wmb();
350     trace_virtqueue_flush(vq, count);
351     old = vq->used_idx;
352     new = old + count;
353     vring_used_idx_set(vq, new);
354     vq->inuse -= count;
355     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
356         vq->signalled_used_valid = false;
357 }
358 
359 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
360                     unsigned int len)
361 {
362     virtqueue_fill(vq, elem, len, 0);
363     virtqueue_flush(vq, 1);
364 }
365 
366 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
367 {
368     uint16_t num_heads = vring_avail_idx(vq) - idx;
369 
370     /* Check it isn't doing very strange things with descriptor numbers. */
371     if (num_heads > vq->vring.num) {
372         virtio_error(vq->vdev, "Guest moved used index from %u to %u",
373                      idx, vq->shadow_avail_idx);
374         return -EINVAL;
375     }
376     /* On success, callers read a descriptor at vq->last_avail_idx.
377      * Make sure descriptor read does not bypass avail index read. */
378     if (num_heads) {
379         smp_rmb();
380     }
381 
382     return num_heads;
383 }
384 
385 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
386                                unsigned int *head)
387 {
388     /* Grab the next descriptor number they're advertising, and increment
389      * the index we've seen. */
390     *head = vring_avail_ring(vq, idx % vq->vring.num);
391 
392     /* If their number is silly, that's a fatal mistake. */
393     if (*head >= vq->vring.num) {
394         virtio_error(vq->vdev, "Guest says index %u is available", *head);
395         return false;
396     }
397 
398     return true;
399 }
400 
401 enum {
402     VIRTQUEUE_READ_DESC_ERROR = -1,
403     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
404     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
405 };
406 
407 static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
408                                     hwaddr desc_pa, unsigned int max,
409                                     unsigned int *next)
410 {
411     /* If this descriptor says it doesn't chain, we're done. */
412     if (!(desc->flags & VRING_DESC_F_NEXT)) {
413         return VIRTQUEUE_READ_DESC_DONE;
414     }
415 
416     /* Check they're not leading us off end of descriptors. */
417     *next = desc->next;
418     /* Make sure compiler knows to grab that: we don't want it changing! */
419     smp_wmb();
420 
421     if (*next >= max) {
422         virtio_error(vdev, "Desc next is %u", *next);
423         return VIRTQUEUE_READ_DESC_ERROR;
424     }
425 
426     vring_desc_read(vdev, desc, desc_pa, *next);
427     return VIRTQUEUE_READ_DESC_MORE;
428 }
429 
430 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
431                                unsigned int *out_bytes,
432                                unsigned max_in_bytes, unsigned max_out_bytes)
433 {
434     unsigned int idx;
435     unsigned int total_bufs, in_total, out_total;
436     int rc;
437 
438     idx = vq->last_avail_idx;
439 
440     total_bufs = in_total = out_total = 0;
441     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
442         VirtIODevice *vdev = vq->vdev;
443         unsigned int max, num_bufs, indirect = 0;
444         VRingDesc desc;
445         hwaddr desc_pa;
446         unsigned int i;
447 
448         max = vq->vring.num;
449         num_bufs = total_bufs;
450 
451         if (!virtqueue_get_head(vq, idx++, &i)) {
452             goto err;
453         }
454 
455         desc_pa = vq->vring.desc;
456         vring_desc_read(vdev, &desc, desc_pa, i);
457 
458         if (desc.flags & VRING_DESC_F_INDIRECT) {
459             if (desc.len % sizeof(VRingDesc)) {
460                 virtio_error(vdev, "Invalid size for indirect buffer table");
461                 goto err;
462             }
463 
464             /* If we've got too many, that implies a descriptor loop. */
465             if (num_bufs >= max) {
466                 virtio_error(vdev, "Looped descriptor");
467                 goto err;
468             }
469 
470             /* loop over the indirect descriptor table */
471             indirect = 1;
472             max = desc.len / sizeof(VRingDesc);
473             desc_pa = desc.addr;
474             num_bufs = i = 0;
475             vring_desc_read(vdev, &desc, desc_pa, i);
476         }
477 
478         do {
479             /* If we've got too many, that implies a descriptor loop. */
480             if (++num_bufs > max) {
481                 virtio_error(vdev, "Looped descriptor");
482                 goto err;
483             }
484 
485             if (desc.flags & VRING_DESC_F_WRITE) {
486                 in_total += desc.len;
487             } else {
488                 out_total += desc.len;
489             }
490             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
491                 goto done;
492             }
493 
494             rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
495         } while (rc == VIRTQUEUE_READ_DESC_MORE);
496 
497         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
498             goto err;
499         }
500 
501         if (!indirect)
502             total_bufs = num_bufs;
503         else
504             total_bufs++;
505     }
506 
507     if (rc < 0) {
508         goto err;
509     }
510 
511 done:
512     if (in_bytes) {
513         *in_bytes = in_total;
514     }
515     if (out_bytes) {
516         *out_bytes = out_total;
517     }
518     return;
519 
520 err:
521     in_total = out_total = 0;
522     goto done;
523 }
524 
525 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
526                           unsigned int out_bytes)
527 {
528     unsigned int in_total, out_total;
529 
530     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
531     return in_bytes <= in_total && out_bytes <= out_total;
532 }
533 
534 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
535                                hwaddr *addr, struct iovec *iov,
536                                unsigned int max_num_sg, bool is_write,
537                                hwaddr pa, size_t sz)
538 {
539     bool ok = false;
540     unsigned num_sg = *p_num_sg;
541     assert(num_sg <= max_num_sg);
542 
543     if (!sz) {
544         virtio_error(vdev, "virtio: zero sized buffers are not allowed");
545         goto out;
546     }
547 
548     while (sz) {
549         hwaddr len = sz;
550 
551         if (num_sg == max_num_sg) {
552             virtio_error(vdev, "virtio: too many write descriptors in "
553                                "indirect table");
554             goto out;
555         }
556 
557         iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
558         if (!iov[num_sg].iov_base) {
559             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
560             goto out;
561         }
562 
563         iov[num_sg].iov_len = len;
564         addr[num_sg] = pa;
565 
566         sz -= len;
567         pa += len;
568         num_sg++;
569     }
570     ok = true;
571 
572 out:
573     *p_num_sg = num_sg;
574     return ok;
575 }
576 
577 /* Only used by error code paths before we have a VirtQueueElement (therefore
578  * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
579  * yet.
580  */
581 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
582                                     struct iovec *iov)
583 {
584     unsigned int i;
585 
586     for (i = 0; i < out_num + in_num; i++) {
587         int is_write = i >= out_num;
588 
589         cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
590         iov++;
591     }
592 }
593 
594 static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
595                                 unsigned int *num_sg, unsigned int max_size,
596                                 int is_write)
597 {
598     unsigned int i;
599     hwaddr len;
600 
601     /* Note: this function MUST validate input, some callers
602      * are passing in num_sg values received over the network.
603      */
604     /* TODO: teach all callers that this can fail, and return failure instead
605      * of asserting here.
606      * When we do, we might be able to re-enable NDEBUG below.
607      */
608 #ifdef NDEBUG
609 #error building with NDEBUG is not supported
610 #endif
611     assert(*num_sg <= max_size);
612 
613     for (i = 0; i < *num_sg; i++) {
614         len = sg[i].iov_len;
615         sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
616         if (!sg[i].iov_base) {
617             error_report("virtio: error trying to map MMIO memory");
618             exit(1);
619         }
620         if (len != sg[i].iov_len) {
621             error_report("virtio: unexpected memory split");
622             exit(1);
623         }
624     }
625 }
626 
627 void virtqueue_map(VirtQueueElement *elem)
628 {
629     virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num,
630                         VIRTQUEUE_MAX_SIZE, 1);
631     virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num,
632                         VIRTQUEUE_MAX_SIZE, 0);
633 }
634 
635 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
636 {
637     VirtQueueElement *elem;
638     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
639     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
640     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
641     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
642     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
643     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
644 
645     assert(sz >= sizeof(VirtQueueElement));
646     elem = g_malloc(out_sg_end);
647     elem->out_num = out_num;
648     elem->in_num = in_num;
649     elem->in_addr = (void *)elem + in_addr_ofs;
650     elem->out_addr = (void *)elem + out_addr_ofs;
651     elem->in_sg = (void *)elem + in_sg_ofs;
652     elem->out_sg = (void *)elem + out_sg_ofs;
653     return elem;
654 }
655 
656 void *virtqueue_pop(VirtQueue *vq, size_t sz)
657 {
658     unsigned int i, head, max;
659     hwaddr desc_pa = vq->vring.desc;
660     VirtIODevice *vdev = vq->vdev;
661     VirtQueueElement *elem;
662     unsigned out_num, in_num;
663     hwaddr addr[VIRTQUEUE_MAX_SIZE];
664     struct iovec iov[VIRTQUEUE_MAX_SIZE];
665     VRingDesc desc;
666     int rc;
667 
668     if (unlikely(vdev->broken)) {
669         return NULL;
670     }
671     if (virtio_queue_empty(vq)) {
672         return NULL;
673     }
674     /* Needed after virtio_queue_empty(), see comment in
675      * virtqueue_num_heads(). */
676     smp_rmb();
677 
678     /* When we start there are none of either input nor output. */
679     out_num = in_num = 0;
680 
681     max = vq->vring.num;
682 
683     if (vq->inuse >= vq->vring.num) {
684         virtio_error(vdev, "Virtqueue size exceeded");
685         return NULL;
686     }
687 
688     if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
689         return NULL;
690     }
691 
692     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
693         vring_set_avail_event(vq, vq->last_avail_idx);
694     }
695 
696     i = head;
697     vring_desc_read(vdev, &desc, desc_pa, i);
698     if (desc.flags & VRING_DESC_F_INDIRECT) {
699         if (desc.len % sizeof(VRingDesc)) {
700             virtio_error(vdev, "Invalid size for indirect buffer table");
701             return NULL;
702         }
703 
704         /* loop over the indirect descriptor table */
705         max = desc.len / sizeof(VRingDesc);
706         desc_pa = desc.addr;
707         i = 0;
708         vring_desc_read(vdev, &desc, desc_pa, i);
709     }
710 
711     /* Collect all the descriptors */
712     do {
713         bool map_ok;
714 
715         if (desc.flags & VRING_DESC_F_WRITE) {
716             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
717                                         iov + out_num,
718                                         VIRTQUEUE_MAX_SIZE - out_num, true,
719                                         desc.addr, desc.len);
720         } else {
721             if (in_num) {
722                 virtio_error(vdev, "Incorrect order for descriptors");
723                 goto err_undo_map;
724             }
725             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
726                                         VIRTQUEUE_MAX_SIZE, false,
727                                         desc.addr, desc.len);
728         }
729         if (!map_ok) {
730             goto err_undo_map;
731         }
732 
733         /* If we've got too many, that implies a descriptor loop. */
734         if ((in_num + out_num) > max) {
735             virtio_error(vdev, "Looped descriptor");
736             goto err_undo_map;
737         }
738 
739         rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
740     } while (rc == VIRTQUEUE_READ_DESC_MORE);
741 
742     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
743         goto err_undo_map;
744     }
745 
746     /* Now copy what we have collected and mapped */
747     elem = virtqueue_alloc_element(sz, out_num, in_num);
748     elem->index = head;
749     for (i = 0; i < out_num; i++) {
750         elem->out_addr[i] = addr[i];
751         elem->out_sg[i] = iov[i];
752     }
753     for (i = 0; i < in_num; i++) {
754         elem->in_addr[i] = addr[out_num + i];
755         elem->in_sg[i] = iov[out_num + i];
756     }
757 
758     vq->inuse++;
759 
760     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
761     return elem;
762 
763 err_undo_map:
764     virtqueue_undo_map_desc(out_num, in_num, iov);
765     return NULL;
766 }
767 
768 /* Reading and writing a structure directly to QEMUFile is *awful*, but
769  * it is what QEMU has always done by mistake.  We can change it sooner
770  * or later by bumping the version number of the affected vm states.
771  * In the meanwhile, since the in-memory layout of VirtQueueElement
772  * has changed, we need to marshal to and from the layout that was
773  * used before the change.
774  */
775 typedef struct VirtQueueElementOld {
776     unsigned int index;
777     unsigned int out_num;
778     unsigned int in_num;
779     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
780     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
781     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
782     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
783 } VirtQueueElementOld;
784 
785 void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
786 {
787     VirtQueueElement *elem;
788     VirtQueueElementOld data;
789     int i;
790 
791     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
792 
793     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
794     elem->index = data.index;
795 
796     for (i = 0; i < elem->in_num; i++) {
797         elem->in_addr[i] = data.in_addr[i];
798     }
799 
800     for (i = 0; i < elem->out_num; i++) {
801         elem->out_addr[i] = data.out_addr[i];
802     }
803 
804     for (i = 0; i < elem->in_num; i++) {
805         /* Base is overwritten by virtqueue_map.  */
806         elem->in_sg[i].iov_base = 0;
807         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
808     }
809 
810     for (i = 0; i < elem->out_num; i++) {
811         /* Base is overwritten by virtqueue_map.  */
812         elem->out_sg[i].iov_base = 0;
813         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
814     }
815 
816     virtqueue_map(elem);
817     return elem;
818 }
819 
820 void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
821 {
822     VirtQueueElementOld data;
823     int i;
824 
825     memset(&data, 0, sizeof(data));
826     data.index = elem->index;
827     data.in_num = elem->in_num;
828     data.out_num = elem->out_num;
829 
830     for (i = 0; i < elem->in_num; i++) {
831         data.in_addr[i] = elem->in_addr[i];
832     }
833 
834     for (i = 0; i < elem->out_num; i++) {
835         data.out_addr[i] = elem->out_addr[i];
836     }
837 
838     for (i = 0; i < elem->in_num; i++) {
839         /* Base is overwritten by virtqueue_map when loading.  Do not
840          * save it, as it would leak the QEMU address space layout.  */
841         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
842     }
843 
844     for (i = 0; i < elem->out_num; i++) {
845         /* Do not save iov_base as above.  */
846         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
847     }
848     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
849 }
850 
851 /* virtio device */
852 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
853 {
854     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
855     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
856 
857     if (unlikely(vdev->broken)) {
858         return;
859     }
860 
861     if (k->notify) {
862         k->notify(qbus->parent, vector);
863     }
864 }
865 
866 void virtio_update_irq(VirtIODevice *vdev)
867 {
868     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
869 }
870 
871 static int virtio_validate_features(VirtIODevice *vdev)
872 {
873     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
874 
875     if (k->validate_features) {
876         return k->validate_features(vdev);
877     } else {
878         return 0;
879     }
880 }
881 
882 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
883 {
884     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
885     trace_virtio_set_status(vdev, val);
886 
887     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
888         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
889             val & VIRTIO_CONFIG_S_FEATURES_OK) {
890             int ret = virtio_validate_features(vdev);
891 
892             if (ret) {
893                 return ret;
894             }
895         }
896     }
897     if (k->set_status) {
898         k->set_status(vdev, val);
899     }
900     vdev->status = val;
901     return 0;
902 }
903 
904 bool target_words_bigendian(void);
905 static enum virtio_device_endian virtio_default_endian(void)
906 {
907     if (target_words_bigendian()) {
908         return VIRTIO_DEVICE_ENDIAN_BIG;
909     } else {
910         return VIRTIO_DEVICE_ENDIAN_LITTLE;
911     }
912 }
913 
914 static enum virtio_device_endian virtio_current_cpu_endian(void)
915 {
916     CPUClass *cc = CPU_GET_CLASS(current_cpu);
917 
918     if (cc->virtio_is_big_endian(current_cpu)) {
919         return VIRTIO_DEVICE_ENDIAN_BIG;
920     } else {
921         return VIRTIO_DEVICE_ENDIAN_LITTLE;
922     }
923 }
924 
925 void virtio_reset(void *opaque)
926 {
927     VirtIODevice *vdev = opaque;
928     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
929     int i;
930 
931     virtio_set_status(vdev, 0);
932     if (current_cpu) {
933         /* Guest initiated reset */
934         vdev->device_endian = virtio_current_cpu_endian();
935     } else {
936         /* System reset */
937         vdev->device_endian = virtio_default_endian();
938     }
939 
940     if (k->reset) {
941         k->reset(vdev);
942     }
943 
944     vdev->broken = false;
945     vdev->guest_features = 0;
946     vdev->queue_sel = 0;
947     vdev->status = 0;
948     atomic_set(&vdev->isr, 0);
949     vdev->config_vector = VIRTIO_NO_VECTOR;
950     virtio_notify_vector(vdev, vdev->config_vector);
951 
952     for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
953         vdev->vq[i].vring.desc = 0;
954         vdev->vq[i].vring.avail = 0;
955         vdev->vq[i].vring.used = 0;
956         vdev->vq[i].last_avail_idx = 0;
957         vdev->vq[i].shadow_avail_idx = 0;
958         vdev->vq[i].used_idx = 0;
959         virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
960         vdev->vq[i].signalled_used = 0;
961         vdev->vq[i].signalled_used_valid = false;
962         vdev->vq[i].notification = true;
963         vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
964         vdev->vq[i].inuse = 0;
965     }
966 }
967 
968 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
969 {
970     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
971     uint8_t val;
972 
973     if (addr + sizeof(val) > vdev->config_len) {
974         return (uint32_t)-1;
975     }
976 
977     k->get_config(vdev, vdev->config);
978 
979     val = ldub_p(vdev->config + addr);
980     return val;
981 }
982 
983 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
984 {
985     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
986     uint16_t val;
987 
988     if (addr + sizeof(val) > vdev->config_len) {
989         return (uint32_t)-1;
990     }
991 
992     k->get_config(vdev, vdev->config);
993 
994     val = lduw_p(vdev->config + addr);
995     return val;
996 }
997 
998 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
999 {
1000     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1001     uint32_t val;
1002 
1003     if (addr + sizeof(val) > vdev->config_len) {
1004         return (uint32_t)-1;
1005     }
1006 
1007     k->get_config(vdev, vdev->config);
1008 
1009     val = ldl_p(vdev->config + addr);
1010     return val;
1011 }
1012 
1013 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1014 {
1015     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1016     uint8_t val = data;
1017 
1018     if (addr + sizeof(val) > vdev->config_len) {
1019         return;
1020     }
1021 
1022     stb_p(vdev->config + addr, val);
1023 
1024     if (k->set_config) {
1025         k->set_config(vdev, vdev->config);
1026     }
1027 }
1028 
1029 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1030 {
1031     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1032     uint16_t val = data;
1033 
1034     if (addr + sizeof(val) > vdev->config_len) {
1035         return;
1036     }
1037 
1038     stw_p(vdev->config + addr, val);
1039 
1040     if (k->set_config) {
1041         k->set_config(vdev, vdev->config);
1042     }
1043 }
1044 
1045 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1046 {
1047     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1048     uint32_t val = data;
1049 
1050     if (addr + sizeof(val) > vdev->config_len) {
1051         return;
1052     }
1053 
1054     stl_p(vdev->config + addr, val);
1055 
1056     if (k->set_config) {
1057         k->set_config(vdev, vdev->config);
1058     }
1059 }
1060 
1061 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
1062 {
1063     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1064     uint8_t val;
1065 
1066     if (addr + sizeof(val) > vdev->config_len) {
1067         return (uint32_t)-1;
1068     }
1069 
1070     k->get_config(vdev, vdev->config);
1071 
1072     val = ldub_p(vdev->config + addr);
1073     return val;
1074 }
1075 
1076 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
1077 {
1078     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1079     uint16_t val;
1080 
1081     if (addr + sizeof(val) > vdev->config_len) {
1082         return (uint32_t)-1;
1083     }
1084 
1085     k->get_config(vdev, vdev->config);
1086 
1087     val = lduw_le_p(vdev->config + addr);
1088     return val;
1089 }
1090 
1091 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
1092 {
1093     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1094     uint32_t val;
1095 
1096     if (addr + sizeof(val) > vdev->config_len) {
1097         return (uint32_t)-1;
1098     }
1099 
1100     k->get_config(vdev, vdev->config);
1101 
1102     val = ldl_le_p(vdev->config + addr);
1103     return val;
1104 }
1105 
1106 void virtio_config_modern_writeb(VirtIODevice *vdev,
1107                                  uint32_t addr, uint32_t data)
1108 {
1109     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1110     uint8_t val = data;
1111 
1112     if (addr + sizeof(val) > vdev->config_len) {
1113         return;
1114     }
1115 
1116     stb_p(vdev->config + addr, val);
1117 
1118     if (k->set_config) {
1119         k->set_config(vdev, vdev->config);
1120     }
1121 }
1122 
1123 void virtio_config_modern_writew(VirtIODevice *vdev,
1124                                  uint32_t addr, uint32_t data)
1125 {
1126     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1127     uint16_t val = data;
1128 
1129     if (addr + sizeof(val) > vdev->config_len) {
1130         return;
1131     }
1132 
1133     stw_le_p(vdev->config + addr, val);
1134 
1135     if (k->set_config) {
1136         k->set_config(vdev, vdev->config);
1137     }
1138 }
1139 
1140 void virtio_config_modern_writel(VirtIODevice *vdev,
1141                                  uint32_t addr, uint32_t data)
1142 {
1143     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1144     uint32_t val = data;
1145 
1146     if (addr + sizeof(val) > vdev->config_len) {
1147         return;
1148     }
1149 
1150     stl_le_p(vdev->config + addr, val);
1151 
1152     if (k->set_config) {
1153         k->set_config(vdev, vdev->config);
1154     }
1155 }
1156 
1157 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
1158 {
1159     vdev->vq[n].vring.desc = addr;
1160     virtio_queue_update_rings(vdev, n);
1161 }
1162 
1163 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
1164 {
1165     return vdev->vq[n].vring.desc;
1166 }
1167 
1168 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
1169                             hwaddr avail, hwaddr used)
1170 {
1171     vdev->vq[n].vring.desc = desc;
1172     vdev->vq[n].vring.avail = avail;
1173     vdev->vq[n].vring.used = used;
1174 }
1175 
1176 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
1177 {
1178     /* Don't allow guest to flip queue between existent and
1179      * nonexistent states, or to set it to an invalid size.
1180      */
1181     if (!!num != !!vdev->vq[n].vring.num ||
1182         num > VIRTQUEUE_MAX_SIZE ||
1183         num < 0) {
1184         return;
1185     }
1186     vdev->vq[n].vring.num = num;
1187 }
1188 
1189 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
1190 {
1191     return QLIST_FIRST(&vdev->vector_queues[vector]);
1192 }
1193 
1194 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
1195 {
1196     return QLIST_NEXT(vq, node);
1197 }
1198 
1199 int virtio_queue_get_num(VirtIODevice *vdev, int n)
1200 {
1201     return vdev->vq[n].vring.num;
1202 }
1203 
1204 int virtio_get_num_queues(VirtIODevice *vdev)
1205 {
1206     int i;
1207 
1208     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1209         if (!virtio_queue_get_num(vdev, i)) {
1210             break;
1211         }
1212     }
1213 
1214     return i;
1215 }
1216 
1217 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
1218 {
1219     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1220     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1221 
1222     /* virtio-1 compliant devices cannot change the alignment */
1223     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1224         error_report("tried to modify queue alignment for virtio-1 device");
1225         return;
1226     }
1227     /* Check that the transport told us it was going to do this
1228      * (so a buggy transport will immediately assert rather than
1229      * silently failing to migrate this state)
1230      */
1231     assert(k->has_variable_vring_alignment);
1232 
1233     vdev->vq[n].vring.align = align;
1234     virtio_queue_update_rings(vdev, n);
1235 }
1236 
1237 static void virtio_queue_notify_aio_vq(VirtQueue *vq)
1238 {
1239     if (vq->vring.desc && vq->handle_aio_output) {
1240         VirtIODevice *vdev = vq->vdev;
1241 
1242         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1243         vq->handle_aio_output(vdev, vq);
1244     }
1245 }
1246 
1247 static void virtio_queue_notify_vq(VirtQueue *vq)
1248 {
1249     if (vq->vring.desc && vq->handle_output) {
1250         VirtIODevice *vdev = vq->vdev;
1251 
1252         if (unlikely(vdev->broken)) {
1253             return;
1254         }
1255 
1256         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1257         vq->handle_output(vdev, vq);
1258     }
1259 }
1260 
1261 void virtio_queue_notify(VirtIODevice *vdev, int n)
1262 {
1263     virtio_queue_notify_vq(&vdev->vq[n]);
1264 }
1265 
1266 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
1267 {
1268     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
1269         VIRTIO_NO_VECTOR;
1270 }
1271 
1272 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
1273 {
1274     VirtQueue *vq = &vdev->vq[n];
1275 
1276     if (n < VIRTIO_QUEUE_MAX) {
1277         if (vdev->vector_queues &&
1278             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
1279             QLIST_REMOVE(vq, node);
1280         }
1281         vdev->vq[n].vector = vector;
1282         if (vdev->vector_queues &&
1283             vector != VIRTIO_NO_VECTOR) {
1284             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
1285         }
1286     }
1287 }
1288 
1289 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
1290                             VirtIOHandleOutput handle_output)
1291 {
1292     int i;
1293 
1294     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1295         if (vdev->vq[i].vring.num == 0)
1296             break;
1297     }
1298 
1299     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
1300         abort();
1301 
1302     vdev->vq[i].vring.num = queue_size;
1303     vdev->vq[i].vring.num_default = queue_size;
1304     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
1305     vdev->vq[i].handle_output = handle_output;
1306     vdev->vq[i].handle_aio_output = NULL;
1307 
1308     return &vdev->vq[i];
1309 }
1310 
1311 void virtio_del_queue(VirtIODevice *vdev, int n)
1312 {
1313     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
1314         abort();
1315     }
1316 
1317     vdev->vq[n].vring.num = 0;
1318     vdev->vq[n].vring.num_default = 0;
1319 }
1320 
1321 static void virtio_set_isr(VirtIODevice *vdev, int value)
1322 {
1323     uint8_t old = atomic_read(&vdev->isr);
1324 
1325     /* Do not write ISR if it does not change, so that its cacheline remains
1326      * shared in the common case where the guest does not read it.
1327      */
1328     if ((old & value) != value) {
1329         atomic_or(&vdev->isr, value);
1330     }
1331 }
1332 
1333 bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
1334 {
1335     uint16_t old, new;
1336     bool v;
1337     /* We need to expose used array entries before checking used event. */
1338     smp_mb();
1339     /* Always notify when queue is empty (when feature acknowledge) */
1340     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
1341         !vq->inuse && virtio_queue_empty(vq)) {
1342         return true;
1343     }
1344 
1345     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1346         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
1347     }
1348 
1349     v = vq->signalled_used_valid;
1350     vq->signalled_used_valid = true;
1351     old = vq->signalled_used;
1352     new = vq->signalled_used = vq->used_idx;
1353     return !v || vring_need_event(vring_get_used_event(vq), new, old);
1354 }
1355 
1356 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
1357 {
1358     if (!virtio_should_notify(vdev, vq)) {
1359         return;
1360     }
1361 
1362     trace_virtio_notify_irqfd(vdev, vq);
1363 
1364     /*
1365      * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
1366      * windows drivers included in virtio-win 1.8.0 (circa 2015) are
1367      * incorrectly polling this bit during crashdump and hibernation
1368      * in MSI mode, causing a hang if this bit is never updated.
1369      * Recent releases of Windows do not really shut down, but rather
1370      * log out and hibernate to make the next startup faster.  Hence,
1371      * this manifested as a more serious hang during shutdown with
1372      *
1373      * Next driver release from 2016 fixed this problem, so working around it
1374      * is not a must, but it's easy to do so let's do it here.
1375      *
1376      * Note: it's safe to update ISR from any thread as it was switched
1377      * to an atomic operation.
1378      */
1379     virtio_set_isr(vq->vdev, 0x1);
1380     event_notifier_set(&vq->guest_notifier);
1381 }
1382 
1383 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
1384 {
1385     if (!virtio_should_notify(vdev, vq)) {
1386         return;
1387     }
1388 
1389     trace_virtio_notify(vdev, vq);
1390     virtio_set_isr(vq->vdev, 0x1);
1391     virtio_notify_vector(vdev, vq->vector);
1392 }
1393 
1394 void virtio_notify_config(VirtIODevice *vdev)
1395 {
1396     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1397         return;
1398 
1399     virtio_set_isr(vdev, 0x3);
1400     vdev->generation++;
1401     virtio_notify_vector(vdev, vdev->config_vector);
1402 }
1403 
1404 static bool virtio_device_endian_needed(void *opaque)
1405 {
1406     VirtIODevice *vdev = opaque;
1407 
1408     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
1409     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1410         return vdev->device_endian != virtio_default_endian();
1411     }
1412     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
1413     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
1414 }
1415 
1416 static bool virtio_64bit_features_needed(void *opaque)
1417 {
1418     VirtIODevice *vdev = opaque;
1419 
1420     return (vdev->host_features >> 32) != 0;
1421 }
1422 
1423 static bool virtio_virtqueue_needed(void *opaque)
1424 {
1425     VirtIODevice *vdev = opaque;
1426 
1427     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
1428 }
1429 
1430 static bool virtio_ringsize_needed(void *opaque)
1431 {
1432     VirtIODevice *vdev = opaque;
1433     int i;
1434 
1435     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1436         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
1437             return true;
1438         }
1439     }
1440     return false;
1441 }
1442 
1443 static bool virtio_extra_state_needed(void *opaque)
1444 {
1445     VirtIODevice *vdev = opaque;
1446     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1447     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1448 
1449     return k->has_extra_state &&
1450         k->has_extra_state(qbus->parent);
1451 }
1452 
1453 static bool virtio_broken_needed(void *opaque)
1454 {
1455     VirtIODevice *vdev = opaque;
1456 
1457     return vdev->broken;
1458 }
1459 
1460 static const VMStateDescription vmstate_virtqueue = {
1461     .name = "virtqueue_state",
1462     .version_id = 1,
1463     .minimum_version_id = 1,
1464     .fields = (VMStateField[]) {
1465         VMSTATE_UINT64(vring.avail, struct VirtQueue),
1466         VMSTATE_UINT64(vring.used, struct VirtQueue),
1467         VMSTATE_END_OF_LIST()
1468     }
1469 };
1470 
1471 static const VMStateDescription vmstate_virtio_virtqueues = {
1472     .name = "virtio/virtqueues",
1473     .version_id = 1,
1474     .minimum_version_id = 1,
1475     .needed = &virtio_virtqueue_needed,
1476     .fields = (VMStateField[]) {
1477         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1478                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
1479         VMSTATE_END_OF_LIST()
1480     }
1481 };
1482 
1483 static const VMStateDescription vmstate_ringsize = {
1484     .name = "ringsize_state",
1485     .version_id = 1,
1486     .minimum_version_id = 1,
1487     .fields = (VMStateField[]) {
1488         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
1489         VMSTATE_END_OF_LIST()
1490     }
1491 };
1492 
1493 static const VMStateDescription vmstate_virtio_ringsize = {
1494     .name = "virtio/ringsize",
1495     .version_id = 1,
1496     .minimum_version_id = 1,
1497     .needed = &virtio_ringsize_needed,
1498     .fields = (VMStateField[]) {
1499         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1500                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
1501         VMSTATE_END_OF_LIST()
1502     }
1503 };
1504 
1505 static int get_extra_state(QEMUFile *f, void *pv, size_t size)
1506 {
1507     VirtIODevice *vdev = pv;
1508     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1509     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1510 
1511     if (!k->load_extra_state) {
1512         return -1;
1513     } else {
1514         return k->load_extra_state(qbus->parent, f);
1515     }
1516 }
1517 
1518 static void put_extra_state(QEMUFile *f, void *pv, size_t size)
1519 {
1520     VirtIODevice *vdev = pv;
1521     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1522     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1523 
1524     k->save_extra_state(qbus->parent, f);
1525 }
1526 
1527 static const VMStateInfo vmstate_info_extra_state = {
1528     .name = "virtqueue_extra_state",
1529     .get = get_extra_state,
1530     .put = put_extra_state,
1531 };
1532 
1533 static const VMStateDescription vmstate_virtio_extra_state = {
1534     .name = "virtio/extra_state",
1535     .version_id = 1,
1536     .minimum_version_id = 1,
1537     .needed = &virtio_extra_state_needed,
1538     .fields = (VMStateField[]) {
1539         {
1540             .name         = "extra_state",
1541             .version_id   = 0,
1542             .field_exists = NULL,
1543             .size         = 0,
1544             .info         = &vmstate_info_extra_state,
1545             .flags        = VMS_SINGLE,
1546             .offset       = 0,
1547         },
1548         VMSTATE_END_OF_LIST()
1549     }
1550 };
1551 
1552 static const VMStateDescription vmstate_virtio_device_endian = {
1553     .name = "virtio/device_endian",
1554     .version_id = 1,
1555     .minimum_version_id = 1,
1556     .needed = &virtio_device_endian_needed,
1557     .fields = (VMStateField[]) {
1558         VMSTATE_UINT8(device_endian, VirtIODevice),
1559         VMSTATE_END_OF_LIST()
1560     }
1561 };
1562 
1563 static const VMStateDescription vmstate_virtio_64bit_features = {
1564     .name = "virtio/64bit_features",
1565     .version_id = 1,
1566     .minimum_version_id = 1,
1567     .needed = &virtio_64bit_features_needed,
1568     .fields = (VMStateField[]) {
1569         VMSTATE_UINT64(guest_features, VirtIODevice),
1570         VMSTATE_END_OF_LIST()
1571     }
1572 };
1573 
1574 static const VMStateDescription vmstate_virtio_broken = {
1575     .name = "virtio/broken",
1576     .version_id = 1,
1577     .minimum_version_id = 1,
1578     .needed = &virtio_broken_needed,
1579     .fields = (VMStateField[]) {
1580         VMSTATE_BOOL(broken, VirtIODevice),
1581         VMSTATE_END_OF_LIST()
1582     }
1583 };
1584 
1585 static const VMStateDescription vmstate_virtio = {
1586     .name = "virtio",
1587     .version_id = 1,
1588     .minimum_version_id = 1,
1589     .minimum_version_id_old = 1,
1590     .fields = (VMStateField[]) {
1591         VMSTATE_END_OF_LIST()
1592     },
1593     .subsections = (const VMStateDescription*[]) {
1594         &vmstate_virtio_device_endian,
1595         &vmstate_virtio_64bit_features,
1596         &vmstate_virtio_virtqueues,
1597         &vmstate_virtio_ringsize,
1598         &vmstate_virtio_broken,
1599         &vmstate_virtio_extra_state,
1600         NULL
1601     }
1602 };
1603 
1604 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
1605 {
1606     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1607     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1608     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1609     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
1610     int i;
1611 
1612     if (k->save_config) {
1613         k->save_config(qbus->parent, f);
1614     }
1615 
1616     qemu_put_8s(f, &vdev->status);
1617     qemu_put_8s(f, &vdev->isr);
1618     qemu_put_be16s(f, &vdev->queue_sel);
1619     qemu_put_be32s(f, &guest_features_lo);
1620     qemu_put_be32(f, vdev->config_len);
1621     qemu_put_buffer(f, vdev->config, vdev->config_len);
1622 
1623     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1624         if (vdev->vq[i].vring.num == 0)
1625             break;
1626     }
1627 
1628     qemu_put_be32(f, i);
1629 
1630     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1631         if (vdev->vq[i].vring.num == 0)
1632             break;
1633 
1634         qemu_put_be32(f, vdev->vq[i].vring.num);
1635         if (k->has_variable_vring_alignment) {
1636             qemu_put_be32(f, vdev->vq[i].vring.align);
1637         }
1638         /* XXX virtio-1 devices */
1639         qemu_put_be64(f, vdev->vq[i].vring.desc);
1640         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
1641         if (k->save_queue) {
1642             k->save_queue(qbus->parent, i, f);
1643         }
1644     }
1645 
1646     if (vdc->save != NULL) {
1647         vdc->save(vdev, f);
1648     }
1649 
1650     if (vdc->vmsd) {
1651         vmstate_save_state(f, vdc->vmsd, vdev, NULL);
1652     }
1653 
1654     /* Subsections */
1655     vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
1656 }
1657 
1658 /* A wrapper for use as a VMState .put function */
1659 static void virtio_device_put(QEMUFile *f, void *opaque, size_t size)
1660 {
1661     virtio_save(VIRTIO_DEVICE(opaque), f);
1662 }
1663 
1664 /* A wrapper for use as a VMState .get function */
1665 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size)
1666 {
1667     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
1668     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
1669 
1670     return virtio_load(vdev, f, dc->vmsd->version_id);
1671 }
1672 
1673 const VMStateInfo  virtio_vmstate_info = {
1674     .name = "virtio",
1675     .get = virtio_device_get,
1676     .put = virtio_device_put,
1677 };
1678 
1679 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
1680 {
1681     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1682     bool bad = (val & ~(vdev->host_features)) != 0;
1683 
1684     val &= vdev->host_features;
1685     if (k->set_features) {
1686         k->set_features(vdev, val);
1687     }
1688     vdev->guest_features = val;
1689     return bad ? -1 : 0;
1690 }
1691 
1692 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
1693 {
1694    /*
1695      * The driver must not attempt to set features after feature negotiation
1696      * has finished.
1697      */
1698     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
1699         return -EINVAL;
1700     }
1701     return virtio_set_features_nocheck(vdev, val);
1702 }
1703 
1704 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
1705 {
1706     int i, ret;
1707     int32_t config_len;
1708     uint32_t num;
1709     uint32_t features;
1710     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1711     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1712     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1713 
1714     /*
1715      * We poison the endianness to ensure it does not get used before
1716      * subsections have been loaded.
1717      */
1718     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
1719 
1720     if (k->load_config) {
1721         ret = k->load_config(qbus->parent, f);
1722         if (ret)
1723             return ret;
1724     }
1725 
1726     qemu_get_8s(f, &vdev->status);
1727     qemu_get_8s(f, &vdev->isr);
1728     qemu_get_be16s(f, &vdev->queue_sel);
1729     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
1730         return -1;
1731     }
1732     qemu_get_be32s(f, &features);
1733 
1734     /*
1735      * Temporarily set guest_features low bits - needed by
1736      * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
1737      * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
1738      *
1739      * Note: devices should always test host features in future - don't create
1740      * new dependencies like this.
1741      */
1742     vdev->guest_features = features;
1743 
1744     config_len = qemu_get_be32(f);
1745 
1746     /*
1747      * There are cases where the incoming config can be bigger or smaller
1748      * than what we have; so load what we have space for, and skip
1749      * any excess that's in the stream.
1750      */
1751     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
1752 
1753     while (config_len > vdev->config_len) {
1754         qemu_get_byte(f);
1755         config_len--;
1756     }
1757 
1758     num = qemu_get_be32(f);
1759 
1760     if (num > VIRTIO_QUEUE_MAX) {
1761         error_report("Invalid number of virtqueues: 0x%x", num);
1762         return -1;
1763     }
1764 
1765     for (i = 0; i < num; i++) {
1766         vdev->vq[i].vring.num = qemu_get_be32(f);
1767         if (k->has_variable_vring_alignment) {
1768             vdev->vq[i].vring.align = qemu_get_be32(f);
1769         }
1770         vdev->vq[i].vring.desc = qemu_get_be64(f);
1771         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
1772         vdev->vq[i].signalled_used_valid = false;
1773         vdev->vq[i].notification = true;
1774 
1775         if (vdev->vq[i].vring.desc) {
1776             /* XXX virtio-1 devices */
1777             virtio_queue_update_rings(vdev, i);
1778         } else if (vdev->vq[i].last_avail_idx) {
1779             error_report("VQ %d address 0x0 "
1780                          "inconsistent with Host index 0x%x",
1781                          i, vdev->vq[i].last_avail_idx);
1782                 return -1;
1783         }
1784         if (k->load_queue) {
1785             ret = k->load_queue(qbus->parent, i, f);
1786             if (ret)
1787                 return ret;
1788         }
1789     }
1790 
1791     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1792 
1793     if (vdc->load != NULL) {
1794         ret = vdc->load(vdev, f, version_id);
1795         if (ret) {
1796             return ret;
1797         }
1798     }
1799 
1800     if (vdc->vmsd) {
1801         ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
1802         if (ret) {
1803             return ret;
1804         }
1805     }
1806 
1807     /* Subsections */
1808     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
1809     if (ret) {
1810         return ret;
1811     }
1812 
1813     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
1814         vdev->device_endian = virtio_default_endian();
1815     }
1816 
1817     if (virtio_64bit_features_needed(vdev)) {
1818         /*
1819          * Subsection load filled vdev->guest_features.  Run them
1820          * through virtio_set_features to sanity-check them against
1821          * host_features.
1822          */
1823         uint64_t features64 = vdev->guest_features;
1824         if (virtio_set_features_nocheck(vdev, features64) < 0) {
1825             error_report("Features 0x%" PRIx64 " unsupported. "
1826                          "Allowed features: 0x%" PRIx64,
1827                          features64, vdev->host_features);
1828             return -1;
1829         }
1830     } else {
1831         if (virtio_set_features_nocheck(vdev, features) < 0) {
1832             error_report("Features 0x%x unsupported. "
1833                          "Allowed features: 0x%" PRIx64,
1834                          features, vdev->host_features);
1835             return -1;
1836         }
1837     }
1838 
1839     for (i = 0; i < num; i++) {
1840         if (vdev->vq[i].vring.desc) {
1841             uint16_t nheads;
1842             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
1843             /* Check it isn't doing strange things with descriptor numbers. */
1844             if (nheads > vdev->vq[i].vring.num) {
1845                 error_report("VQ %d size 0x%x Guest index 0x%x "
1846                              "inconsistent with Host index 0x%x: delta 0x%x",
1847                              i, vdev->vq[i].vring.num,
1848                              vring_avail_idx(&vdev->vq[i]),
1849                              vdev->vq[i].last_avail_idx, nheads);
1850                 return -1;
1851             }
1852             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
1853             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
1854 
1855             /*
1856              * Some devices migrate VirtQueueElements that have been popped
1857              * from the avail ring but not yet returned to the used ring.
1858              */
1859             vdev->vq[i].inuse = vdev->vq[i].last_avail_idx -
1860                                 vdev->vq[i].used_idx;
1861             if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
1862                 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
1863                              "used_idx 0x%x",
1864                              i, vdev->vq[i].vring.num,
1865                              vdev->vq[i].last_avail_idx,
1866                              vdev->vq[i].used_idx);
1867                 return -1;
1868             }
1869         }
1870     }
1871 
1872     return 0;
1873 }
1874 
1875 void virtio_cleanup(VirtIODevice *vdev)
1876 {
1877     qemu_del_vm_change_state_handler(vdev->vmstate);
1878     g_free(vdev->config);
1879     g_free(vdev->vq);
1880     g_free(vdev->vector_queues);
1881 }
1882 
1883 static void virtio_vmstate_change(void *opaque, int running, RunState state)
1884 {
1885     VirtIODevice *vdev = opaque;
1886     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1887     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1888     bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
1889     vdev->vm_running = running;
1890 
1891     if (backend_run) {
1892         virtio_set_status(vdev, vdev->status);
1893     }
1894 
1895     if (k->vmstate_change) {
1896         k->vmstate_change(qbus->parent, backend_run);
1897     }
1898 
1899     if (!backend_run) {
1900         virtio_set_status(vdev, vdev->status);
1901     }
1902 }
1903 
1904 void virtio_instance_init_common(Object *proxy_obj, void *data,
1905                                  size_t vdev_size, const char *vdev_name)
1906 {
1907     DeviceState *vdev = data;
1908 
1909     object_initialize(vdev, vdev_size, vdev_name);
1910     object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
1911     object_unref(OBJECT(vdev));
1912     qdev_alias_all_properties(vdev, proxy_obj);
1913 }
1914 
1915 void virtio_init(VirtIODevice *vdev, const char *name,
1916                  uint16_t device_id, size_t config_size)
1917 {
1918     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1919     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1920     int i;
1921     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
1922 
1923     if (nvectors) {
1924         vdev->vector_queues =
1925             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
1926     }
1927 
1928     vdev->device_id = device_id;
1929     vdev->status = 0;
1930     atomic_set(&vdev->isr, 0);
1931     vdev->queue_sel = 0;
1932     vdev->config_vector = VIRTIO_NO_VECTOR;
1933     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
1934     vdev->vm_running = runstate_is_running();
1935     vdev->broken = false;
1936     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1937         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1938         vdev->vq[i].vdev = vdev;
1939         vdev->vq[i].queue_index = i;
1940     }
1941 
1942     vdev->name = name;
1943     vdev->config_len = config_size;
1944     if (vdev->config_len) {
1945         vdev->config = g_malloc0(config_size);
1946     } else {
1947         vdev->config = NULL;
1948     }
1949     vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
1950                                                      vdev);
1951     vdev->device_endian = virtio_default_endian();
1952     vdev->use_guest_notifier_mask = true;
1953 }
1954 
1955 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1956 {
1957     return vdev->vq[n].vring.desc;
1958 }
1959 
1960 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1961 {
1962     return vdev->vq[n].vring.avail;
1963 }
1964 
1965 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1966 {
1967     return vdev->vq[n].vring.used;
1968 }
1969 
1970 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1971 {
1972     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1973 }
1974 
1975 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1976 {
1977     return offsetof(VRingAvail, ring) +
1978         sizeof(uint16_t) * vdev->vq[n].vring.num;
1979 }
1980 
1981 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1982 {
1983     return offsetof(VRingUsed, ring) +
1984         sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1985 }
1986 
1987 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1988 {
1989     return vdev->vq[n].last_avail_idx;
1990 }
1991 
1992 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1993 {
1994     vdev->vq[n].last_avail_idx = idx;
1995     vdev->vq[n].shadow_avail_idx = idx;
1996 }
1997 
1998 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
1999 {
2000     vdev->vq[n].signalled_used_valid = false;
2001 }
2002 
2003 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
2004 {
2005     return vdev->vq + n;
2006 }
2007 
2008 uint16_t virtio_get_queue_index(VirtQueue *vq)
2009 {
2010     return vq->queue_index;
2011 }
2012 
2013 static void virtio_queue_guest_notifier_read(EventNotifier *n)
2014 {
2015     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
2016     if (event_notifier_test_and_clear(n)) {
2017         virtio_notify_vector(vq->vdev, vq->vector);
2018     }
2019 }
2020 
2021 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
2022                                                 bool with_irqfd)
2023 {
2024     if (assign && !with_irqfd) {
2025         event_notifier_set_handler(&vq->guest_notifier, false,
2026                                    virtio_queue_guest_notifier_read);
2027     } else {
2028         event_notifier_set_handler(&vq->guest_notifier, false, NULL);
2029     }
2030     if (!assign) {
2031         /* Test and clear notifier before closing it,
2032          * in case poll callback didn't have time to run. */
2033         virtio_queue_guest_notifier_read(&vq->guest_notifier);
2034     }
2035 }
2036 
2037 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
2038 {
2039     return &vq->guest_notifier;
2040 }
2041 
2042 static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
2043 {
2044     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2045     if (event_notifier_test_and_clear(n)) {
2046         virtio_queue_notify_aio_vq(vq);
2047     }
2048 }
2049 
2050 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
2051                                                 VirtIOHandleOutput handle_output)
2052 {
2053     if (handle_output) {
2054         vq->handle_aio_output = handle_output;
2055         aio_set_event_notifier(ctx, &vq->host_notifier, true,
2056                                virtio_queue_host_notifier_aio_read, NULL);
2057     } else {
2058         aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
2059         /* Test and clear notifier before after disabling event,
2060          * in case poll callback didn't have time to run. */
2061         virtio_queue_host_notifier_aio_read(&vq->host_notifier);
2062         vq->handle_aio_output = NULL;
2063     }
2064 }
2065 
2066 void virtio_queue_host_notifier_read(EventNotifier *n)
2067 {
2068     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2069     if (event_notifier_test_and_clear(n)) {
2070         virtio_queue_notify_vq(vq);
2071     }
2072 }
2073 
2074 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
2075 {
2076     return &vq->host_notifier;
2077 }
2078 
2079 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
2080 {
2081     g_free(vdev->bus_name);
2082     vdev->bus_name = g_strdup(bus_name);
2083 }
2084 
2085 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
2086 {
2087     va_list ap;
2088 
2089     va_start(ap, fmt);
2090     error_vreport(fmt, ap);
2091     va_end(ap);
2092 
2093     vdev->broken = true;
2094 
2095     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2096         virtio_set_status(vdev, vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET);
2097         virtio_notify_config(vdev);
2098     }
2099 }
2100 
2101 static void virtio_device_realize(DeviceState *dev, Error **errp)
2102 {
2103     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2104     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
2105     Error *err = NULL;
2106 
2107     /* Devices should either use vmsd or the load/save methods */
2108     assert(!vdc->vmsd || !vdc->load);
2109 
2110     if (vdc->realize != NULL) {
2111         vdc->realize(dev, &err);
2112         if (err != NULL) {
2113             error_propagate(errp, err);
2114             return;
2115         }
2116     }
2117 
2118     virtio_bus_device_plugged(vdev, &err);
2119     if (err != NULL) {
2120         error_propagate(errp, err);
2121         return;
2122     }
2123 }
2124 
2125 static void virtio_device_unrealize(DeviceState *dev, Error **errp)
2126 {
2127     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2128     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
2129     Error *err = NULL;
2130 
2131     virtio_bus_device_unplugged(vdev);
2132 
2133     if (vdc->unrealize != NULL) {
2134         vdc->unrealize(dev, &err);
2135         if (err != NULL) {
2136             error_propagate(errp, err);
2137             return;
2138         }
2139     }
2140 
2141     g_free(vdev->bus_name);
2142     vdev->bus_name = NULL;
2143 }
2144 
2145 static Property virtio_properties[] = {
2146     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
2147     DEFINE_PROP_END_OF_LIST(),
2148 };
2149 
2150 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
2151 {
2152     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
2153     int n, r, err;
2154 
2155     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2156         VirtQueue *vq = &vdev->vq[n];
2157         if (!virtio_queue_get_num(vdev, n)) {
2158             continue;
2159         }
2160         r = virtio_bus_set_host_notifier(qbus, n, true);
2161         if (r < 0) {
2162             err = r;
2163             goto assign_error;
2164         }
2165         event_notifier_set_handler(&vq->host_notifier, true,
2166                                    virtio_queue_host_notifier_read);
2167     }
2168 
2169     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2170         /* Kick right away to begin processing requests already in vring */
2171         VirtQueue *vq = &vdev->vq[n];
2172         if (!vq->vring.num) {
2173             continue;
2174         }
2175         event_notifier_set(&vq->host_notifier);
2176     }
2177     return 0;
2178 
2179 assign_error:
2180     while (--n >= 0) {
2181         VirtQueue *vq = &vdev->vq[n];
2182         if (!virtio_queue_get_num(vdev, n)) {
2183             continue;
2184         }
2185 
2186         event_notifier_set_handler(&vq->host_notifier, true, NULL);
2187         r = virtio_bus_set_host_notifier(qbus, n, false);
2188         assert(r >= 0);
2189     }
2190     return err;
2191 }
2192 
2193 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
2194 {
2195     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2196     VirtioBusState *vbus = VIRTIO_BUS(qbus);
2197 
2198     return virtio_bus_start_ioeventfd(vbus);
2199 }
2200 
2201 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
2202 {
2203     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
2204     int n, r;
2205 
2206     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2207         VirtQueue *vq = &vdev->vq[n];
2208 
2209         if (!virtio_queue_get_num(vdev, n)) {
2210             continue;
2211         }
2212         event_notifier_set_handler(&vq->host_notifier, true, NULL);
2213         r = virtio_bus_set_host_notifier(qbus, n, false);
2214         assert(r >= 0);
2215     }
2216 }
2217 
2218 void virtio_device_stop_ioeventfd(VirtIODevice *vdev)
2219 {
2220     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2221     VirtioBusState *vbus = VIRTIO_BUS(qbus);
2222 
2223     virtio_bus_stop_ioeventfd(vbus);
2224 }
2225 
2226 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
2227 {
2228     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2229     VirtioBusState *vbus = VIRTIO_BUS(qbus);
2230 
2231     return virtio_bus_grab_ioeventfd(vbus);
2232 }
2233 
2234 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
2235 {
2236     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2237     VirtioBusState *vbus = VIRTIO_BUS(qbus);
2238 
2239     virtio_bus_release_ioeventfd(vbus);
2240 }
2241 
2242 static void virtio_device_class_init(ObjectClass *klass, void *data)
2243 {
2244     /* Set the default value here. */
2245     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2246     DeviceClass *dc = DEVICE_CLASS(klass);
2247 
2248     dc->realize = virtio_device_realize;
2249     dc->unrealize = virtio_device_unrealize;
2250     dc->bus_type = TYPE_VIRTIO_BUS;
2251     dc->props = virtio_properties;
2252     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
2253     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
2254 
2255     vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
2256 }
2257 
2258 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
2259 {
2260     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2261     VirtioBusState *vbus = VIRTIO_BUS(qbus);
2262 
2263     return virtio_bus_ioeventfd_enabled(vbus);
2264 }
2265 
2266 static const TypeInfo virtio_device_info = {
2267     .name = TYPE_VIRTIO_DEVICE,
2268     .parent = TYPE_DEVICE,
2269     .instance_size = sizeof(VirtIODevice),
2270     .class_init = virtio_device_class_init,
2271     .abstract = true,
2272     .class_size = sizeof(VirtioDeviceClass),
2273 };
2274 
2275 static void virtio_register_types(void)
2276 {
2277     type_register_static(&virtio_device_info);
2278 }
2279 
2280 type_init(virtio_register_types)
2281