xref: /openbmc/qemu/hw/virtio/virtio.c (revision 90ce6e26)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 
16 #include "trace.h"
17 #include "exec/address-spaces.h"
18 #include "qemu/error-report.h"
19 #include "hw/virtio/virtio.h"
20 #include "qemu/atomic.h"
21 #include "hw/virtio/virtio-bus.h"
22 #include "migration/migration.h"
23 #include "hw/virtio/virtio-access.h"
24 
25 /*
26  * The alignment to use between consumer and producer parts of vring.
27  * x86 pagesize again. This is the default, used by transports like PCI
28  * which don't provide a means for the guest to tell the host the alignment.
29  */
30 #define VIRTIO_PCI_VRING_ALIGN         4096
31 
32 typedef struct VRingDesc
33 {
34     uint64_t addr;
35     uint32_t len;
36     uint16_t flags;
37     uint16_t next;
38 } VRingDesc;
39 
40 typedef struct VRingAvail
41 {
42     uint16_t flags;
43     uint16_t idx;
44     uint16_t ring[0];
45 } VRingAvail;
46 
47 typedef struct VRingUsedElem
48 {
49     uint32_t id;
50     uint32_t len;
51 } VRingUsedElem;
52 
53 typedef struct VRingUsed
54 {
55     uint16_t flags;
56     uint16_t idx;
57     VRingUsedElem ring[0];
58 } VRingUsed;
59 
60 typedef struct VRing
61 {
62     unsigned int num;
63     unsigned int num_default;
64     unsigned int align;
65     hwaddr desc;
66     hwaddr avail;
67     hwaddr used;
68 } VRing;
69 
70 struct VirtQueue
71 {
72     VRing vring;
73 
74     /* Next head to pop */
75     uint16_t last_avail_idx;
76 
77     /* Last avail_idx read from VQ. */
78     uint16_t shadow_avail_idx;
79 
80     uint16_t used_idx;
81 
82     /* Last used index value we have signalled on */
83     uint16_t signalled_used;
84 
85     /* Last used index value we have signalled on */
86     bool signalled_used_valid;
87 
88     /* Notification enabled? */
89     bool notification;
90 
91     uint16_t queue_index;
92 
93     int inuse;
94 
95     uint16_t vector;
96     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
97     VirtIODevice *vdev;
98     EventNotifier guest_notifier;
99     EventNotifier host_notifier;
100     QLIST_ENTRY(VirtQueue) node;
101 };
102 
103 /* virt queue functions */
104 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
105 {
106     VRing *vring = &vdev->vq[n].vring;
107 
108     if (!vring->desc) {
109         /* not yet setup -> nothing to do */
110         return;
111     }
112     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
113     vring->used = vring_align(vring->avail +
114                               offsetof(VRingAvail, ring[vring->num]),
115                               vring->align);
116 }
117 
118 static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
119                             hwaddr desc_pa, int i)
120 {
121     address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc),
122                        MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
123     virtio_tswap64s(vdev, &desc->addr);
124     virtio_tswap32s(vdev, &desc->len);
125     virtio_tswap16s(vdev, &desc->flags);
126     virtio_tswap16s(vdev, &desc->next);
127 }
128 
129 static inline uint16_t vring_avail_flags(VirtQueue *vq)
130 {
131     hwaddr pa;
132     pa = vq->vring.avail + offsetof(VRingAvail, flags);
133     return virtio_lduw_phys(vq->vdev, pa);
134 }
135 
136 static inline uint16_t vring_avail_idx(VirtQueue *vq)
137 {
138     hwaddr pa;
139     pa = vq->vring.avail + offsetof(VRingAvail, idx);
140     vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
141     return vq->shadow_avail_idx;
142 }
143 
144 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
145 {
146     hwaddr pa;
147     pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
148     return virtio_lduw_phys(vq->vdev, pa);
149 }
150 
151 static inline uint16_t vring_get_used_event(VirtQueue *vq)
152 {
153     return vring_avail_ring(vq, vq->vring.num);
154 }
155 
156 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
157                                     int i)
158 {
159     hwaddr pa;
160     virtio_tswap32s(vq->vdev, &uelem->id);
161     virtio_tswap32s(vq->vdev, &uelem->len);
162     pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
163     address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED,
164                        (void *)uelem, sizeof(VRingUsedElem));
165 }
166 
167 static uint16_t vring_used_idx(VirtQueue *vq)
168 {
169     hwaddr pa;
170     pa = vq->vring.used + offsetof(VRingUsed, idx);
171     return virtio_lduw_phys(vq->vdev, pa);
172 }
173 
174 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
175 {
176     hwaddr pa;
177     pa = vq->vring.used + offsetof(VRingUsed, idx);
178     virtio_stw_phys(vq->vdev, pa, val);
179     vq->used_idx = val;
180 }
181 
182 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
183 {
184     VirtIODevice *vdev = vq->vdev;
185     hwaddr pa;
186     pa = vq->vring.used + offsetof(VRingUsed, flags);
187     virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
188 }
189 
190 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
191 {
192     VirtIODevice *vdev = vq->vdev;
193     hwaddr pa;
194     pa = vq->vring.used + offsetof(VRingUsed, flags);
195     virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
196 }
197 
198 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
199 {
200     hwaddr pa;
201     if (!vq->notification) {
202         return;
203     }
204     pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
205     virtio_stw_phys(vq->vdev, pa, val);
206 }
207 
208 void virtio_queue_set_notification(VirtQueue *vq, int enable)
209 {
210     vq->notification = enable;
211     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
212         vring_set_avail_event(vq, vring_avail_idx(vq));
213     } else if (enable) {
214         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
215     } else {
216         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
217     }
218     if (enable) {
219         /* Expose avail event/used flags before caller checks the avail idx. */
220         smp_mb();
221     }
222 }
223 
224 int virtio_queue_ready(VirtQueue *vq)
225 {
226     return vq->vring.avail != 0;
227 }
228 
229 /* Fetch avail_idx from VQ memory only when we really need to know if
230  * guest has added some buffers. */
231 int virtio_queue_empty(VirtQueue *vq)
232 {
233     if (vq->shadow_avail_idx != vq->last_avail_idx) {
234         return 0;
235     }
236 
237     return vring_avail_idx(vq) == vq->last_avail_idx;
238 }
239 
240 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
241                                unsigned int len)
242 {
243     unsigned int offset;
244     int i;
245 
246     offset = 0;
247     for (i = 0; i < elem->in_num; i++) {
248         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
249 
250         cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
251                                   elem->in_sg[i].iov_len,
252                                   1, size);
253 
254         offset += size;
255     }
256 
257     for (i = 0; i < elem->out_num; i++)
258         cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
259                                   elem->out_sg[i].iov_len,
260                                   0, elem->out_sg[i].iov_len);
261 }
262 
263 void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
264                        unsigned int len)
265 {
266     vq->last_avail_idx--;
267     virtqueue_unmap_sg(vq, elem, len);
268 }
269 
270 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
271                     unsigned int len, unsigned int idx)
272 {
273     VRingUsedElem uelem;
274 
275     trace_virtqueue_fill(vq, elem, len, idx);
276 
277     virtqueue_unmap_sg(vq, elem, len);
278 
279     idx = (idx + vq->used_idx) % vq->vring.num;
280 
281     uelem.id = elem->index;
282     uelem.len = len;
283     vring_used_write(vq, &uelem, idx);
284 }
285 
286 void virtqueue_flush(VirtQueue *vq, unsigned int count)
287 {
288     uint16_t old, new;
289     /* Make sure buffer is written before we update index. */
290     smp_wmb();
291     trace_virtqueue_flush(vq, count);
292     old = vq->used_idx;
293     new = old + count;
294     vring_used_idx_set(vq, new);
295     vq->inuse -= count;
296     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
297         vq->signalled_used_valid = false;
298 }
299 
300 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
301                     unsigned int len)
302 {
303     virtqueue_fill(vq, elem, len, 0);
304     virtqueue_flush(vq, 1);
305 }
306 
307 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
308 {
309     uint16_t num_heads = vring_avail_idx(vq) - idx;
310 
311     /* Check it isn't doing very strange things with descriptor numbers. */
312     if (num_heads > vq->vring.num) {
313         error_report("Guest moved used index from %u to %u",
314                      idx, vq->shadow_avail_idx);
315         exit(1);
316     }
317     /* On success, callers read a descriptor at vq->last_avail_idx.
318      * Make sure descriptor read does not bypass avail index read. */
319     if (num_heads) {
320         smp_rmb();
321     }
322 
323     return num_heads;
324 }
325 
326 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
327 {
328     unsigned int head;
329 
330     /* Grab the next descriptor number they're advertising, and increment
331      * the index we've seen. */
332     head = vring_avail_ring(vq, idx % vq->vring.num);
333 
334     /* If their number is silly, that's a fatal mistake. */
335     if (head >= vq->vring.num) {
336         error_report("Guest says index %u is available", head);
337         exit(1);
338     }
339 
340     return head;
341 }
342 
343 static unsigned virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
344                                          hwaddr desc_pa, unsigned int max)
345 {
346     unsigned int next;
347 
348     /* If this descriptor says it doesn't chain, we're done. */
349     if (!(desc->flags & VRING_DESC_F_NEXT)) {
350         return max;
351     }
352 
353     /* Check they're not leading us off end of descriptors. */
354     next = desc->next;
355     /* Make sure compiler knows to grab that: we don't want it changing! */
356     smp_wmb();
357 
358     if (next >= max) {
359         error_report("Desc next is %u", next);
360         exit(1);
361     }
362 
363     vring_desc_read(vdev, desc, desc_pa, next);
364     return next;
365 }
366 
367 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
368                                unsigned int *out_bytes,
369                                unsigned max_in_bytes, unsigned max_out_bytes)
370 {
371     unsigned int idx;
372     unsigned int total_bufs, in_total, out_total;
373 
374     idx = vq->last_avail_idx;
375 
376     total_bufs = in_total = out_total = 0;
377     while (virtqueue_num_heads(vq, idx)) {
378         VirtIODevice *vdev = vq->vdev;
379         unsigned int max, num_bufs, indirect = 0;
380         VRingDesc desc;
381         hwaddr desc_pa;
382         int i;
383 
384         max = vq->vring.num;
385         num_bufs = total_bufs;
386         i = virtqueue_get_head(vq, idx++);
387         desc_pa = vq->vring.desc;
388         vring_desc_read(vdev, &desc, desc_pa, i);
389 
390         if (desc.flags & VRING_DESC_F_INDIRECT) {
391             if (desc.len % sizeof(VRingDesc)) {
392                 error_report("Invalid size for indirect buffer table");
393                 exit(1);
394             }
395 
396             /* If we've got too many, that implies a descriptor loop. */
397             if (num_bufs >= max) {
398                 error_report("Looped descriptor");
399                 exit(1);
400             }
401 
402             /* loop over the indirect descriptor table */
403             indirect = 1;
404             max = desc.len / sizeof(VRingDesc);
405             desc_pa = desc.addr;
406             num_bufs = i = 0;
407             vring_desc_read(vdev, &desc, desc_pa, i);
408         }
409 
410         do {
411             /* If we've got too many, that implies a descriptor loop. */
412             if (++num_bufs > max) {
413                 error_report("Looped descriptor");
414                 exit(1);
415             }
416 
417             if (desc.flags & VRING_DESC_F_WRITE) {
418                 in_total += desc.len;
419             } else {
420                 out_total += desc.len;
421             }
422             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
423                 goto done;
424             }
425         } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
426 
427         if (!indirect)
428             total_bufs = num_bufs;
429         else
430             total_bufs++;
431     }
432 done:
433     if (in_bytes) {
434         *in_bytes = in_total;
435     }
436     if (out_bytes) {
437         *out_bytes = out_total;
438     }
439 }
440 
441 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
442                           unsigned int out_bytes)
443 {
444     unsigned int in_total, out_total;
445 
446     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
447     return in_bytes <= in_total && out_bytes <= out_total;
448 }
449 
450 static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov,
451                                unsigned int max_num_sg, bool is_write,
452                                hwaddr pa, size_t sz)
453 {
454     unsigned num_sg = *p_num_sg;
455     assert(num_sg <= max_num_sg);
456 
457     while (sz) {
458         hwaddr len = sz;
459 
460         if (num_sg == max_num_sg) {
461             error_report("virtio: too many write descriptors in indirect table");
462             exit(1);
463         }
464 
465         iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
466         iov[num_sg].iov_len = len;
467         addr[num_sg] = pa;
468 
469         sz -= len;
470         pa += len;
471         num_sg++;
472     }
473     *p_num_sg = num_sg;
474 }
475 
476 static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
477                                 unsigned int *num_sg, unsigned int max_size,
478                                 int is_write)
479 {
480     unsigned int i;
481     hwaddr len;
482 
483     /* Note: this function MUST validate input, some callers
484      * are passing in num_sg values received over the network.
485      */
486     /* TODO: teach all callers that this can fail, and return failure instead
487      * of asserting here.
488      * When we do, we might be able to re-enable NDEBUG below.
489      */
490 #ifdef NDEBUG
491 #error building with NDEBUG is not supported
492 #endif
493     assert(*num_sg <= max_size);
494 
495     for (i = 0; i < *num_sg; i++) {
496         len = sg[i].iov_len;
497         sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
498         if (!sg[i].iov_base) {
499             error_report("virtio: error trying to map MMIO memory");
500             exit(1);
501         }
502         if (len != sg[i].iov_len) {
503             error_report("virtio: unexpected memory split");
504             exit(1);
505         }
506     }
507 }
508 
509 void virtqueue_map(VirtQueueElement *elem)
510 {
511     virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num,
512                         VIRTQUEUE_MAX_SIZE, 1);
513     virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num,
514                         VIRTQUEUE_MAX_SIZE, 0);
515 }
516 
517 void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
518 {
519     VirtQueueElement *elem;
520     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
521     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
522     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
523     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
524     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
525     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
526 
527     assert(sz >= sizeof(VirtQueueElement));
528     elem = g_malloc(out_sg_end);
529     elem->out_num = out_num;
530     elem->in_num = in_num;
531     elem->in_addr = (void *)elem + in_addr_ofs;
532     elem->out_addr = (void *)elem + out_addr_ofs;
533     elem->in_sg = (void *)elem + in_sg_ofs;
534     elem->out_sg = (void *)elem + out_sg_ofs;
535     return elem;
536 }
537 
538 void *virtqueue_pop(VirtQueue *vq, size_t sz)
539 {
540     unsigned int i, head, max;
541     hwaddr desc_pa = vq->vring.desc;
542     VirtIODevice *vdev = vq->vdev;
543     VirtQueueElement *elem;
544     unsigned out_num, in_num;
545     hwaddr addr[VIRTQUEUE_MAX_SIZE];
546     struct iovec iov[VIRTQUEUE_MAX_SIZE];
547     VRingDesc desc;
548 
549     if (virtio_queue_empty(vq)) {
550         return NULL;
551     }
552     /* Needed after virtio_queue_empty(), see comment in
553      * virtqueue_num_heads(). */
554     smp_rmb();
555 
556     /* When we start there are none of either input nor output. */
557     out_num = in_num = 0;
558 
559     max = vq->vring.num;
560 
561     i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
562     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
563         vring_set_avail_event(vq, vq->last_avail_idx);
564     }
565 
566     vring_desc_read(vdev, &desc, desc_pa, i);
567     if (desc.flags & VRING_DESC_F_INDIRECT) {
568         if (desc.len % sizeof(VRingDesc)) {
569             error_report("Invalid size for indirect buffer table");
570             exit(1);
571         }
572 
573         /* loop over the indirect descriptor table */
574         max = desc.len / sizeof(VRingDesc);
575         desc_pa = desc.addr;
576         i = 0;
577         vring_desc_read(vdev, &desc, desc_pa, i);
578     }
579 
580     /* Collect all the descriptors */
581     do {
582         if (desc.flags & VRING_DESC_F_WRITE) {
583             virtqueue_map_desc(&in_num, addr + out_num, iov + out_num,
584                                VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len);
585         } else {
586             if (in_num) {
587                 error_report("Incorrect order for descriptors");
588                 exit(1);
589             }
590             virtqueue_map_desc(&out_num, addr, iov,
591                                VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len);
592         }
593 
594         /* If we've got too many, that implies a descriptor loop. */
595         if ((in_num + out_num) > max) {
596             error_report("Looped descriptor");
597             exit(1);
598         }
599     } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
600 
601     /* Now copy what we have collected and mapped */
602     elem = virtqueue_alloc_element(sz, out_num, in_num);
603     elem->index = head;
604     for (i = 0; i < out_num; i++) {
605         elem->out_addr[i] = addr[i];
606         elem->out_sg[i] = iov[i];
607     }
608     for (i = 0; i < in_num; i++) {
609         elem->in_addr[i] = addr[out_num + i];
610         elem->in_sg[i] = iov[out_num + i];
611     }
612 
613     vq->inuse++;
614 
615     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
616     return elem;
617 }
618 
619 /* Reading and writing a structure directly to QEMUFile is *awful*, but
620  * it is what QEMU has always done by mistake.  We can change it sooner
621  * or later by bumping the version number of the affected vm states.
622  * In the meanwhile, since the in-memory layout of VirtQueueElement
623  * has changed, we need to marshal to and from the layout that was
624  * used before the change.
625  */
626 typedef struct VirtQueueElementOld {
627     unsigned int index;
628     unsigned int out_num;
629     unsigned int in_num;
630     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
631     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
632     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
633     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
634 } VirtQueueElementOld;
635 
636 void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
637 {
638     VirtQueueElement *elem;
639     VirtQueueElementOld data;
640     int i;
641 
642     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
643 
644     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
645     elem->index = data.index;
646 
647     for (i = 0; i < elem->in_num; i++) {
648         elem->in_addr[i] = data.in_addr[i];
649     }
650 
651     for (i = 0; i < elem->out_num; i++) {
652         elem->out_addr[i] = data.out_addr[i];
653     }
654 
655     for (i = 0; i < elem->in_num; i++) {
656         /* Base is overwritten by virtqueue_map.  */
657         elem->in_sg[i].iov_base = 0;
658         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
659     }
660 
661     for (i = 0; i < elem->out_num; i++) {
662         /* Base is overwritten by virtqueue_map.  */
663         elem->out_sg[i].iov_base = 0;
664         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
665     }
666 
667     virtqueue_map(elem);
668     return elem;
669 }
670 
671 void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
672 {
673     VirtQueueElementOld data;
674     int i;
675 
676     memset(&data, 0, sizeof(data));
677     data.index = elem->index;
678     data.in_num = elem->in_num;
679     data.out_num = elem->out_num;
680 
681     for (i = 0; i < elem->in_num; i++) {
682         data.in_addr[i] = elem->in_addr[i];
683     }
684 
685     for (i = 0; i < elem->out_num; i++) {
686         data.out_addr[i] = elem->out_addr[i];
687     }
688 
689     for (i = 0; i < elem->in_num; i++) {
690         /* Base is overwritten by virtqueue_map when loading.  Do not
691          * save it, as it would leak the QEMU address space layout.  */
692         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
693     }
694 
695     for (i = 0; i < elem->out_num; i++) {
696         /* Do not save iov_base as above.  */
697         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
698     }
699     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
700 }
701 
702 /* virtio device */
703 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
704 {
705     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
706     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
707 
708     if (k->notify) {
709         k->notify(qbus->parent, vector);
710     }
711 }
712 
713 void virtio_update_irq(VirtIODevice *vdev)
714 {
715     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
716 }
717 
718 static int virtio_validate_features(VirtIODevice *vdev)
719 {
720     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
721 
722     if (k->validate_features) {
723         return k->validate_features(vdev);
724     } else {
725         return 0;
726     }
727 }
728 
729 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
730 {
731     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
732     trace_virtio_set_status(vdev, val);
733 
734     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
735         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
736             val & VIRTIO_CONFIG_S_FEATURES_OK) {
737             int ret = virtio_validate_features(vdev);
738 
739             if (ret) {
740                 return ret;
741             }
742         }
743     }
744     if (k->set_status) {
745         k->set_status(vdev, val);
746     }
747     vdev->status = val;
748     return 0;
749 }
750 
751 bool target_words_bigendian(void);
752 static enum virtio_device_endian virtio_default_endian(void)
753 {
754     if (target_words_bigendian()) {
755         return VIRTIO_DEVICE_ENDIAN_BIG;
756     } else {
757         return VIRTIO_DEVICE_ENDIAN_LITTLE;
758     }
759 }
760 
761 static enum virtio_device_endian virtio_current_cpu_endian(void)
762 {
763     CPUClass *cc = CPU_GET_CLASS(current_cpu);
764 
765     if (cc->virtio_is_big_endian(current_cpu)) {
766         return VIRTIO_DEVICE_ENDIAN_BIG;
767     } else {
768         return VIRTIO_DEVICE_ENDIAN_LITTLE;
769     }
770 }
771 
772 void virtio_reset(void *opaque)
773 {
774     VirtIODevice *vdev = opaque;
775     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
776     int i;
777 
778     virtio_set_status(vdev, 0);
779     if (current_cpu) {
780         /* Guest initiated reset */
781         vdev->device_endian = virtio_current_cpu_endian();
782     } else {
783         /* System reset */
784         vdev->device_endian = virtio_default_endian();
785     }
786 
787     if (k->reset) {
788         k->reset(vdev);
789     }
790 
791     vdev->guest_features = 0;
792     vdev->queue_sel = 0;
793     vdev->status = 0;
794     vdev->isr = 0;
795     vdev->config_vector = VIRTIO_NO_VECTOR;
796     virtio_notify_vector(vdev, vdev->config_vector);
797 
798     for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
799         vdev->vq[i].vring.desc = 0;
800         vdev->vq[i].vring.avail = 0;
801         vdev->vq[i].vring.used = 0;
802         vdev->vq[i].last_avail_idx = 0;
803         vdev->vq[i].shadow_avail_idx = 0;
804         vdev->vq[i].used_idx = 0;
805         virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
806         vdev->vq[i].signalled_used = 0;
807         vdev->vq[i].signalled_used_valid = false;
808         vdev->vq[i].notification = true;
809         vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
810     }
811 }
812 
813 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
814 {
815     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
816     uint8_t val;
817 
818     if (addr + sizeof(val) > vdev->config_len) {
819         return (uint32_t)-1;
820     }
821 
822     k->get_config(vdev, vdev->config);
823 
824     val = ldub_p(vdev->config + addr);
825     return val;
826 }
827 
828 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
829 {
830     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
831     uint16_t val;
832 
833     if (addr + sizeof(val) > vdev->config_len) {
834         return (uint32_t)-1;
835     }
836 
837     k->get_config(vdev, vdev->config);
838 
839     val = lduw_p(vdev->config + addr);
840     return val;
841 }
842 
843 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
844 {
845     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
846     uint32_t val;
847 
848     if (addr + sizeof(val) > vdev->config_len) {
849         return (uint32_t)-1;
850     }
851 
852     k->get_config(vdev, vdev->config);
853 
854     val = ldl_p(vdev->config + addr);
855     return val;
856 }
857 
858 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
859 {
860     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
861     uint8_t val = data;
862 
863     if (addr + sizeof(val) > vdev->config_len) {
864         return;
865     }
866 
867     stb_p(vdev->config + addr, val);
868 
869     if (k->set_config) {
870         k->set_config(vdev, vdev->config);
871     }
872 }
873 
874 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
875 {
876     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
877     uint16_t val = data;
878 
879     if (addr + sizeof(val) > vdev->config_len) {
880         return;
881     }
882 
883     stw_p(vdev->config + addr, val);
884 
885     if (k->set_config) {
886         k->set_config(vdev, vdev->config);
887     }
888 }
889 
890 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
891 {
892     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
893     uint32_t val = data;
894 
895     if (addr + sizeof(val) > vdev->config_len) {
896         return;
897     }
898 
899     stl_p(vdev->config + addr, val);
900 
901     if (k->set_config) {
902         k->set_config(vdev, vdev->config);
903     }
904 }
905 
906 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
907 {
908     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
909     uint8_t val;
910 
911     if (addr + sizeof(val) > vdev->config_len) {
912         return (uint32_t)-1;
913     }
914 
915     k->get_config(vdev, vdev->config);
916 
917     val = ldub_p(vdev->config + addr);
918     return val;
919 }
920 
921 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
922 {
923     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
924     uint16_t val;
925 
926     if (addr + sizeof(val) > vdev->config_len) {
927         return (uint32_t)-1;
928     }
929 
930     k->get_config(vdev, vdev->config);
931 
932     val = lduw_le_p(vdev->config + addr);
933     return val;
934 }
935 
936 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
937 {
938     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
939     uint32_t val;
940 
941     if (addr + sizeof(val) > vdev->config_len) {
942         return (uint32_t)-1;
943     }
944 
945     k->get_config(vdev, vdev->config);
946 
947     val = ldl_le_p(vdev->config + addr);
948     return val;
949 }
950 
951 void virtio_config_modern_writeb(VirtIODevice *vdev,
952                                  uint32_t addr, uint32_t data)
953 {
954     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
955     uint8_t val = data;
956 
957     if (addr + sizeof(val) > vdev->config_len) {
958         return;
959     }
960 
961     stb_p(vdev->config + addr, val);
962 
963     if (k->set_config) {
964         k->set_config(vdev, vdev->config);
965     }
966 }
967 
968 void virtio_config_modern_writew(VirtIODevice *vdev,
969                                  uint32_t addr, uint32_t data)
970 {
971     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
972     uint16_t val = data;
973 
974     if (addr + sizeof(val) > vdev->config_len) {
975         return;
976     }
977 
978     stw_le_p(vdev->config + addr, val);
979 
980     if (k->set_config) {
981         k->set_config(vdev, vdev->config);
982     }
983 }
984 
985 void virtio_config_modern_writel(VirtIODevice *vdev,
986                                  uint32_t addr, uint32_t data)
987 {
988     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
989     uint32_t val = data;
990 
991     if (addr + sizeof(val) > vdev->config_len) {
992         return;
993     }
994 
995     stl_le_p(vdev->config + addr, val);
996 
997     if (k->set_config) {
998         k->set_config(vdev, vdev->config);
999     }
1000 }
1001 
1002 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
1003 {
1004     vdev->vq[n].vring.desc = addr;
1005     virtio_queue_update_rings(vdev, n);
1006 }
1007 
1008 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
1009 {
1010     return vdev->vq[n].vring.desc;
1011 }
1012 
1013 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
1014                             hwaddr avail, hwaddr used)
1015 {
1016     vdev->vq[n].vring.desc = desc;
1017     vdev->vq[n].vring.avail = avail;
1018     vdev->vq[n].vring.used = used;
1019 }
1020 
1021 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
1022 {
1023     /* Don't allow guest to flip queue between existent and
1024      * nonexistent states, or to set it to an invalid size.
1025      */
1026     if (!!num != !!vdev->vq[n].vring.num ||
1027         num > VIRTQUEUE_MAX_SIZE ||
1028         num < 0) {
1029         return;
1030     }
1031     vdev->vq[n].vring.num = num;
1032 }
1033 
1034 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
1035 {
1036     return QLIST_FIRST(&vdev->vector_queues[vector]);
1037 }
1038 
1039 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
1040 {
1041     return QLIST_NEXT(vq, node);
1042 }
1043 
1044 int virtio_queue_get_num(VirtIODevice *vdev, int n)
1045 {
1046     return vdev->vq[n].vring.num;
1047 }
1048 
1049 int virtio_get_num_queues(VirtIODevice *vdev)
1050 {
1051     int i;
1052 
1053     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1054         if (!virtio_queue_get_num(vdev, i)) {
1055             break;
1056         }
1057     }
1058 
1059     return i;
1060 }
1061 
1062 int virtio_queue_get_id(VirtQueue *vq)
1063 {
1064     VirtIODevice *vdev = vq->vdev;
1065     assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_QUEUE_MAX]);
1066     return vq - &vdev->vq[0];
1067 }
1068 
1069 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
1070 {
1071     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1072     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1073 
1074     /* virtio-1 compliant devices cannot change the alignment */
1075     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1076         error_report("tried to modify queue alignment for virtio-1 device");
1077         return;
1078     }
1079     /* Check that the transport told us it was going to do this
1080      * (so a buggy transport will immediately assert rather than
1081      * silently failing to migrate this state)
1082      */
1083     assert(k->has_variable_vring_alignment);
1084 
1085     vdev->vq[n].vring.align = align;
1086     virtio_queue_update_rings(vdev, n);
1087 }
1088 
1089 void virtio_queue_notify_vq(VirtQueue *vq)
1090 {
1091     if (vq->vring.desc && vq->handle_output) {
1092         VirtIODevice *vdev = vq->vdev;
1093 
1094         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1095         vq->handle_output(vdev, vq);
1096     }
1097 }
1098 
1099 void virtio_queue_notify(VirtIODevice *vdev, int n)
1100 {
1101     virtio_queue_notify_vq(&vdev->vq[n]);
1102 }
1103 
1104 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
1105 {
1106     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
1107         VIRTIO_NO_VECTOR;
1108 }
1109 
1110 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
1111 {
1112     VirtQueue *vq = &vdev->vq[n];
1113 
1114     if (n < VIRTIO_QUEUE_MAX) {
1115         if (vdev->vector_queues &&
1116             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
1117             QLIST_REMOVE(vq, node);
1118         }
1119         vdev->vq[n].vector = vector;
1120         if (vdev->vector_queues &&
1121             vector != VIRTIO_NO_VECTOR) {
1122             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
1123         }
1124     }
1125 }
1126 
1127 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
1128                             void (*handle_output)(VirtIODevice *, VirtQueue *))
1129 {
1130     int i;
1131 
1132     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1133         if (vdev->vq[i].vring.num == 0)
1134             break;
1135     }
1136 
1137     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
1138         abort();
1139 
1140     vdev->vq[i].vring.num = queue_size;
1141     vdev->vq[i].vring.num_default = queue_size;
1142     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
1143     vdev->vq[i].handle_output = handle_output;
1144 
1145     return &vdev->vq[i];
1146 }
1147 
1148 void virtio_del_queue(VirtIODevice *vdev, int n)
1149 {
1150     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
1151         abort();
1152     }
1153 
1154     vdev->vq[n].vring.num = 0;
1155     vdev->vq[n].vring.num_default = 0;
1156 }
1157 
1158 void virtio_irq(VirtQueue *vq)
1159 {
1160     trace_virtio_irq(vq);
1161     vq->vdev->isr |= 0x01;
1162     virtio_notify_vector(vq->vdev, vq->vector);
1163 }
1164 
1165 static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
1166 {
1167     uint16_t old, new;
1168     bool v;
1169     /* We need to expose used array entries before checking used event. */
1170     smp_mb();
1171     /* Always notify when queue is empty (when feature acknowledge) */
1172     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
1173         !vq->inuse && virtio_queue_empty(vq)) {
1174         return true;
1175     }
1176 
1177     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1178         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
1179     }
1180 
1181     v = vq->signalled_used_valid;
1182     vq->signalled_used_valid = true;
1183     old = vq->signalled_used;
1184     new = vq->signalled_used = vq->used_idx;
1185     return !v || vring_need_event(vring_get_used_event(vq), new, old);
1186 }
1187 
1188 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
1189 {
1190     if (!vring_notify(vdev, vq)) {
1191         return;
1192     }
1193 
1194     trace_virtio_notify(vdev, vq);
1195     vdev->isr |= 0x01;
1196     virtio_notify_vector(vdev, vq->vector);
1197 }
1198 
1199 void virtio_notify_config(VirtIODevice *vdev)
1200 {
1201     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1202         return;
1203 
1204     vdev->isr |= 0x03;
1205     vdev->generation++;
1206     virtio_notify_vector(vdev, vdev->config_vector);
1207 }
1208 
1209 static bool virtio_device_endian_needed(void *opaque)
1210 {
1211     VirtIODevice *vdev = opaque;
1212 
1213     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
1214     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1215         return vdev->device_endian != virtio_default_endian();
1216     }
1217     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
1218     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
1219 }
1220 
1221 static bool virtio_64bit_features_needed(void *opaque)
1222 {
1223     VirtIODevice *vdev = opaque;
1224 
1225     return (vdev->host_features >> 32) != 0;
1226 }
1227 
1228 static bool virtio_virtqueue_needed(void *opaque)
1229 {
1230     VirtIODevice *vdev = opaque;
1231 
1232     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
1233 }
1234 
1235 static bool virtio_ringsize_needed(void *opaque)
1236 {
1237     VirtIODevice *vdev = opaque;
1238     int i;
1239 
1240     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1241         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
1242             return true;
1243         }
1244     }
1245     return false;
1246 }
1247 
1248 static bool virtio_extra_state_needed(void *opaque)
1249 {
1250     VirtIODevice *vdev = opaque;
1251     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1252     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1253 
1254     return k->has_extra_state &&
1255         k->has_extra_state(qbus->parent);
1256 }
1257 
1258 static const VMStateDescription vmstate_virtqueue = {
1259     .name = "virtqueue_state",
1260     .version_id = 1,
1261     .minimum_version_id = 1,
1262     .fields = (VMStateField[]) {
1263         VMSTATE_UINT64(vring.avail, struct VirtQueue),
1264         VMSTATE_UINT64(vring.used, struct VirtQueue),
1265         VMSTATE_END_OF_LIST()
1266     }
1267 };
1268 
1269 static const VMStateDescription vmstate_virtio_virtqueues = {
1270     .name = "virtio/virtqueues",
1271     .version_id = 1,
1272     .minimum_version_id = 1,
1273     .needed = &virtio_virtqueue_needed,
1274     .fields = (VMStateField[]) {
1275         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1276                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
1277         VMSTATE_END_OF_LIST()
1278     }
1279 };
1280 
1281 static const VMStateDescription vmstate_ringsize = {
1282     .name = "ringsize_state",
1283     .version_id = 1,
1284     .minimum_version_id = 1,
1285     .fields = (VMStateField[]) {
1286         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
1287         VMSTATE_END_OF_LIST()
1288     }
1289 };
1290 
1291 static const VMStateDescription vmstate_virtio_ringsize = {
1292     .name = "virtio/ringsize",
1293     .version_id = 1,
1294     .minimum_version_id = 1,
1295     .needed = &virtio_ringsize_needed,
1296     .fields = (VMStateField[]) {
1297         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1298                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
1299         VMSTATE_END_OF_LIST()
1300     }
1301 };
1302 
1303 static int get_extra_state(QEMUFile *f, void *pv, size_t size)
1304 {
1305     VirtIODevice *vdev = pv;
1306     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1307     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1308 
1309     if (!k->load_extra_state) {
1310         return -1;
1311     } else {
1312         return k->load_extra_state(qbus->parent, f);
1313     }
1314 }
1315 
1316 static void put_extra_state(QEMUFile *f, void *pv, size_t size)
1317 {
1318     VirtIODevice *vdev = pv;
1319     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1320     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1321 
1322     k->save_extra_state(qbus->parent, f);
1323 }
1324 
1325 static const VMStateInfo vmstate_info_extra_state = {
1326     .name = "virtqueue_extra_state",
1327     .get = get_extra_state,
1328     .put = put_extra_state,
1329 };
1330 
1331 static const VMStateDescription vmstate_virtio_extra_state = {
1332     .name = "virtio/extra_state",
1333     .version_id = 1,
1334     .minimum_version_id = 1,
1335     .needed = &virtio_extra_state_needed,
1336     .fields = (VMStateField[]) {
1337         {
1338             .name         = "extra_state",
1339             .version_id   = 0,
1340             .field_exists = NULL,
1341             .size         = 0,
1342             .info         = &vmstate_info_extra_state,
1343             .flags        = VMS_SINGLE,
1344             .offset       = 0,
1345         },
1346         VMSTATE_END_OF_LIST()
1347     }
1348 };
1349 
1350 static const VMStateDescription vmstate_virtio_device_endian = {
1351     .name = "virtio/device_endian",
1352     .version_id = 1,
1353     .minimum_version_id = 1,
1354     .needed = &virtio_device_endian_needed,
1355     .fields = (VMStateField[]) {
1356         VMSTATE_UINT8(device_endian, VirtIODevice),
1357         VMSTATE_END_OF_LIST()
1358     }
1359 };
1360 
1361 static const VMStateDescription vmstate_virtio_64bit_features = {
1362     .name = "virtio/64bit_features",
1363     .version_id = 1,
1364     .minimum_version_id = 1,
1365     .needed = &virtio_64bit_features_needed,
1366     .fields = (VMStateField[]) {
1367         VMSTATE_UINT64(guest_features, VirtIODevice),
1368         VMSTATE_END_OF_LIST()
1369     }
1370 };
1371 
1372 static const VMStateDescription vmstate_virtio = {
1373     .name = "virtio",
1374     .version_id = 1,
1375     .minimum_version_id = 1,
1376     .minimum_version_id_old = 1,
1377     .fields = (VMStateField[]) {
1378         VMSTATE_END_OF_LIST()
1379     },
1380     .subsections = (const VMStateDescription*[]) {
1381         &vmstate_virtio_device_endian,
1382         &vmstate_virtio_64bit_features,
1383         &vmstate_virtio_virtqueues,
1384         &vmstate_virtio_ringsize,
1385         &vmstate_virtio_extra_state,
1386         NULL
1387     }
1388 };
1389 
1390 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
1391 {
1392     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1393     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1394     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1395     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
1396     int i;
1397 
1398     if (k->save_config) {
1399         k->save_config(qbus->parent, f);
1400     }
1401 
1402     qemu_put_8s(f, &vdev->status);
1403     qemu_put_8s(f, &vdev->isr);
1404     qemu_put_be16s(f, &vdev->queue_sel);
1405     qemu_put_be32s(f, &guest_features_lo);
1406     qemu_put_be32(f, vdev->config_len);
1407     qemu_put_buffer(f, vdev->config, vdev->config_len);
1408 
1409     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1410         if (vdev->vq[i].vring.num == 0)
1411             break;
1412     }
1413 
1414     qemu_put_be32(f, i);
1415 
1416     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1417         if (vdev->vq[i].vring.num == 0)
1418             break;
1419 
1420         qemu_put_be32(f, vdev->vq[i].vring.num);
1421         if (k->has_variable_vring_alignment) {
1422             qemu_put_be32(f, vdev->vq[i].vring.align);
1423         }
1424         /* XXX virtio-1 devices */
1425         qemu_put_be64(f, vdev->vq[i].vring.desc);
1426         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
1427         if (k->save_queue) {
1428             k->save_queue(qbus->parent, i, f);
1429         }
1430     }
1431 
1432     if (vdc->save != NULL) {
1433         vdc->save(vdev, f);
1434     }
1435 
1436     /* Subsections */
1437     vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
1438 }
1439 
1440 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
1441 {
1442     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1443     bool bad = (val & ~(vdev->host_features)) != 0;
1444 
1445     val &= vdev->host_features;
1446     if (k->set_features) {
1447         k->set_features(vdev, val);
1448     }
1449     vdev->guest_features = val;
1450     return bad ? -1 : 0;
1451 }
1452 
1453 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
1454 {
1455    /*
1456      * The driver must not attempt to set features after feature negotiation
1457      * has finished.
1458      */
1459     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
1460         return -EINVAL;
1461     }
1462     return virtio_set_features_nocheck(vdev, val);
1463 }
1464 
1465 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
1466 {
1467     int i, ret;
1468     int32_t config_len;
1469     uint32_t num;
1470     uint32_t features;
1471     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1472     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1473     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1474 
1475     /*
1476      * We poison the endianness to ensure it does not get used before
1477      * subsections have been loaded.
1478      */
1479     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
1480 
1481     if (k->load_config) {
1482         ret = k->load_config(qbus->parent, f);
1483         if (ret)
1484             return ret;
1485     }
1486 
1487     qemu_get_8s(f, &vdev->status);
1488     qemu_get_8s(f, &vdev->isr);
1489     qemu_get_be16s(f, &vdev->queue_sel);
1490     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
1491         return -1;
1492     }
1493     qemu_get_be32s(f, &features);
1494 
1495     config_len = qemu_get_be32(f);
1496 
1497     /*
1498      * There are cases where the incoming config can be bigger or smaller
1499      * than what we have; so load what we have space for, and skip
1500      * any excess that's in the stream.
1501      */
1502     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
1503 
1504     while (config_len > vdev->config_len) {
1505         qemu_get_byte(f);
1506         config_len--;
1507     }
1508 
1509     num = qemu_get_be32(f);
1510 
1511     if (num > VIRTIO_QUEUE_MAX) {
1512         error_report("Invalid number of virtqueues: 0x%x", num);
1513         return -1;
1514     }
1515 
1516     for (i = 0; i < num; i++) {
1517         vdev->vq[i].vring.num = qemu_get_be32(f);
1518         if (k->has_variable_vring_alignment) {
1519             vdev->vq[i].vring.align = qemu_get_be32(f);
1520         }
1521         vdev->vq[i].vring.desc = qemu_get_be64(f);
1522         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
1523         vdev->vq[i].signalled_used_valid = false;
1524         vdev->vq[i].notification = true;
1525 
1526         if (vdev->vq[i].vring.desc) {
1527             /* XXX virtio-1 devices */
1528             virtio_queue_update_rings(vdev, i);
1529         } else if (vdev->vq[i].last_avail_idx) {
1530             error_report("VQ %d address 0x0 "
1531                          "inconsistent with Host index 0x%x",
1532                          i, vdev->vq[i].last_avail_idx);
1533                 return -1;
1534 	}
1535         if (k->load_queue) {
1536             ret = k->load_queue(qbus->parent, i, f);
1537             if (ret)
1538                 return ret;
1539         }
1540     }
1541 
1542     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1543 
1544     if (vdc->load != NULL) {
1545         ret = vdc->load(vdev, f, version_id);
1546         if (ret) {
1547             return ret;
1548         }
1549     }
1550 
1551     /* Subsections */
1552     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
1553     if (ret) {
1554         return ret;
1555     }
1556 
1557     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
1558         vdev->device_endian = virtio_default_endian();
1559     }
1560 
1561     if (virtio_64bit_features_needed(vdev)) {
1562         /*
1563          * Subsection load filled vdev->guest_features.  Run them
1564          * through virtio_set_features to sanity-check them against
1565          * host_features.
1566          */
1567         uint64_t features64 = vdev->guest_features;
1568         if (virtio_set_features_nocheck(vdev, features64) < 0) {
1569             error_report("Features 0x%" PRIx64 " unsupported. "
1570                          "Allowed features: 0x%" PRIx64,
1571                          features64, vdev->host_features);
1572             return -1;
1573         }
1574     } else {
1575         if (virtio_set_features_nocheck(vdev, features) < 0) {
1576             error_report("Features 0x%x unsupported. "
1577                          "Allowed features: 0x%" PRIx64,
1578                          features, vdev->host_features);
1579             return -1;
1580         }
1581     }
1582 
1583     for (i = 0; i < num; i++) {
1584         if (vdev->vq[i].vring.desc) {
1585             uint16_t nheads;
1586             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
1587             /* Check it isn't doing strange things with descriptor numbers. */
1588             if (nheads > vdev->vq[i].vring.num) {
1589                 error_report("VQ %d size 0x%x Guest index 0x%x "
1590                              "inconsistent with Host index 0x%x: delta 0x%x",
1591                              i, vdev->vq[i].vring.num,
1592                              vring_avail_idx(&vdev->vq[i]),
1593                              vdev->vq[i].last_avail_idx, nheads);
1594                 return -1;
1595             }
1596             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
1597             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
1598         }
1599     }
1600 
1601     return 0;
1602 }
1603 
1604 void virtio_cleanup(VirtIODevice *vdev)
1605 {
1606     qemu_del_vm_change_state_handler(vdev->vmstate);
1607     g_free(vdev->config);
1608     g_free(vdev->vq);
1609     g_free(vdev->vector_queues);
1610 }
1611 
1612 static void virtio_vmstate_change(void *opaque, int running, RunState state)
1613 {
1614     VirtIODevice *vdev = opaque;
1615     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1616     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1617     bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
1618     vdev->vm_running = running;
1619 
1620     if (backend_run) {
1621         virtio_set_status(vdev, vdev->status);
1622     }
1623 
1624     if (k->vmstate_change) {
1625         k->vmstate_change(qbus->parent, backend_run);
1626     }
1627 
1628     if (!backend_run) {
1629         virtio_set_status(vdev, vdev->status);
1630     }
1631 }
1632 
1633 void virtio_instance_init_common(Object *proxy_obj, void *data,
1634                                  size_t vdev_size, const char *vdev_name)
1635 {
1636     DeviceState *vdev = data;
1637 
1638     object_initialize(vdev, vdev_size, vdev_name);
1639     object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
1640     object_unref(OBJECT(vdev));
1641     qdev_alias_all_properties(vdev, proxy_obj);
1642 }
1643 
1644 void virtio_init(VirtIODevice *vdev, const char *name,
1645                  uint16_t device_id, size_t config_size)
1646 {
1647     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1648     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1649     int i;
1650     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
1651 
1652     if (nvectors) {
1653         vdev->vector_queues =
1654             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
1655     }
1656 
1657     vdev->device_id = device_id;
1658     vdev->status = 0;
1659     vdev->isr = 0;
1660     vdev->queue_sel = 0;
1661     vdev->config_vector = VIRTIO_NO_VECTOR;
1662     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
1663     vdev->vm_running = runstate_is_running();
1664     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1665         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1666         vdev->vq[i].vdev = vdev;
1667         vdev->vq[i].queue_index = i;
1668     }
1669 
1670     vdev->name = name;
1671     vdev->config_len = config_size;
1672     if (vdev->config_len) {
1673         vdev->config = g_malloc0(config_size);
1674     } else {
1675         vdev->config = NULL;
1676     }
1677     vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
1678                                                      vdev);
1679     vdev->device_endian = virtio_default_endian();
1680     vdev->use_guest_notifier_mask = true;
1681 }
1682 
1683 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1684 {
1685     return vdev->vq[n].vring.desc;
1686 }
1687 
1688 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1689 {
1690     return vdev->vq[n].vring.avail;
1691 }
1692 
1693 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1694 {
1695     return vdev->vq[n].vring.used;
1696 }
1697 
1698 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
1699 {
1700     return vdev->vq[n].vring.desc;
1701 }
1702 
1703 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1704 {
1705     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1706 }
1707 
1708 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1709 {
1710     return offsetof(VRingAvail, ring) +
1711         sizeof(uint16_t) * vdev->vq[n].vring.num;
1712 }
1713 
1714 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1715 {
1716     return offsetof(VRingUsed, ring) +
1717         sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1718 }
1719 
1720 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
1721 {
1722     return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
1723 	    virtio_queue_get_used_size(vdev, n);
1724 }
1725 
1726 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1727 {
1728     return vdev->vq[n].last_avail_idx;
1729 }
1730 
1731 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1732 {
1733     vdev->vq[n].last_avail_idx = idx;
1734     vdev->vq[n].shadow_avail_idx = idx;
1735 }
1736 
1737 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
1738 {
1739     vdev->vq[n].signalled_used_valid = false;
1740 }
1741 
1742 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
1743 {
1744     return vdev->vq + n;
1745 }
1746 
1747 uint16_t virtio_get_queue_index(VirtQueue *vq)
1748 {
1749     return vq->queue_index;
1750 }
1751 
1752 static void virtio_queue_guest_notifier_read(EventNotifier *n)
1753 {
1754     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
1755     if (event_notifier_test_and_clear(n)) {
1756         virtio_irq(vq);
1757     }
1758 }
1759 
1760 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
1761                                                 bool with_irqfd)
1762 {
1763     if (assign && !with_irqfd) {
1764         event_notifier_set_handler(&vq->guest_notifier,
1765                                    virtio_queue_guest_notifier_read);
1766     } else {
1767         event_notifier_set_handler(&vq->guest_notifier, NULL);
1768     }
1769     if (!assign) {
1770         /* Test and clear notifier before closing it,
1771          * in case poll callback didn't have time to run. */
1772         virtio_queue_guest_notifier_read(&vq->guest_notifier);
1773     }
1774 }
1775 
1776 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
1777 {
1778     return &vq->guest_notifier;
1779 }
1780 
1781 static void virtio_queue_host_notifier_read(EventNotifier *n)
1782 {
1783     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1784     if (event_notifier_test_and_clear(n)) {
1785         virtio_queue_notify_vq(vq);
1786     }
1787 }
1788 
1789 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
1790                                                bool set_handler)
1791 {
1792     if (assign && set_handler) {
1793         event_notifier_set_handler(&vq->host_notifier,
1794                                    virtio_queue_host_notifier_read);
1795     } else {
1796         event_notifier_set_handler(&vq->host_notifier, NULL);
1797     }
1798     if (!assign) {
1799         /* Test and clear notifier before after disabling event,
1800          * in case poll callback didn't have time to run. */
1801         virtio_queue_host_notifier_read(&vq->host_notifier);
1802     }
1803 }
1804 
1805 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
1806 {
1807     return &vq->host_notifier;
1808 }
1809 
1810 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
1811 {
1812     g_free(vdev->bus_name);
1813     vdev->bus_name = g_strdup(bus_name);
1814 }
1815 
1816 static void virtio_device_realize(DeviceState *dev, Error **errp)
1817 {
1818     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1819     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1820     Error *err = NULL;
1821 
1822     if (vdc->realize != NULL) {
1823         vdc->realize(dev, &err);
1824         if (err != NULL) {
1825             error_propagate(errp, err);
1826             return;
1827         }
1828     }
1829 
1830     virtio_bus_device_plugged(vdev, &err);
1831     if (err != NULL) {
1832         error_propagate(errp, err);
1833         return;
1834     }
1835 }
1836 
1837 static void virtio_device_unrealize(DeviceState *dev, Error **errp)
1838 {
1839     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1840     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1841     Error *err = NULL;
1842 
1843     virtio_bus_device_unplugged(vdev);
1844 
1845     if (vdc->unrealize != NULL) {
1846         vdc->unrealize(dev, &err);
1847         if (err != NULL) {
1848             error_propagate(errp, err);
1849             return;
1850         }
1851     }
1852 
1853     g_free(vdev->bus_name);
1854     vdev->bus_name = NULL;
1855 }
1856 
1857 static Property virtio_properties[] = {
1858     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
1859     DEFINE_PROP_END_OF_LIST(),
1860 };
1861 
1862 static void virtio_device_class_init(ObjectClass *klass, void *data)
1863 {
1864     /* Set the default value here. */
1865     DeviceClass *dc = DEVICE_CLASS(klass);
1866 
1867     dc->realize = virtio_device_realize;
1868     dc->unrealize = virtio_device_unrealize;
1869     dc->bus_type = TYPE_VIRTIO_BUS;
1870     dc->props = virtio_properties;
1871 }
1872 
1873 static const TypeInfo virtio_device_info = {
1874     .name = TYPE_VIRTIO_DEVICE,
1875     .parent = TYPE_DEVICE,
1876     .instance_size = sizeof(VirtIODevice),
1877     .class_init = virtio_device_class_init,
1878     .abstract = true,
1879     .class_size = sizeof(VirtioDeviceClass),
1880 };
1881 
1882 static void virtio_register_types(void)
1883 {
1884     type_register_static(&virtio_device_info);
1885 }
1886 
1887 type_init(virtio_register_types)
1888