xref: /openbmc/qemu/hw/virtio/virtio.c (revision 786a4ea8)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include <inttypes.h>
15 
16 #include "trace.h"
17 #include "exec/address-spaces.h"
18 #include "qemu/error-report.h"
19 #include "hw/virtio/virtio.h"
20 #include "qemu/atomic.h"
21 #include "hw/virtio/virtio-bus.h"
22 #include "migration/migration.h"
23 #include "hw/virtio/virtio-access.h"
24 
25 /*
26  * The alignment to use between consumer and producer parts of vring.
27  * x86 pagesize again. This is the default, used by transports like PCI
28  * which don't provide a means for the guest to tell the host the alignment.
29  */
30 #define VIRTIO_PCI_VRING_ALIGN         4096
31 
32 typedef struct VRingDesc
33 {
34     uint64_t addr;
35     uint32_t len;
36     uint16_t flags;
37     uint16_t next;
38 } VRingDesc;
39 
40 typedef struct VRingAvail
41 {
42     uint16_t flags;
43     uint16_t idx;
44     uint16_t ring[0];
45 } VRingAvail;
46 
47 typedef struct VRingUsedElem
48 {
49     uint32_t id;
50     uint32_t len;
51 } VRingUsedElem;
52 
53 typedef struct VRingUsed
54 {
55     uint16_t flags;
56     uint16_t idx;
57     VRingUsedElem ring[0];
58 } VRingUsed;
59 
60 typedef struct VRing
61 {
62     unsigned int num;
63     unsigned int align;
64     hwaddr desc;
65     hwaddr avail;
66     hwaddr used;
67 } VRing;
68 
69 struct VirtQueue
70 {
71     VRing vring;
72     hwaddr pa;
73     uint16_t last_avail_idx;
74     /* Last used index value we have signalled on */
75     uint16_t signalled_used;
76 
77     /* Last used index value we have signalled on */
78     bool signalled_used_valid;
79 
80     /* Notification enabled? */
81     bool notification;
82 
83     uint16_t queue_index;
84 
85     int inuse;
86 
87     uint16_t vector;
88     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
89     VirtIODevice *vdev;
90     EventNotifier guest_notifier;
91     EventNotifier host_notifier;
92 };
93 
94 /* virt queue functions */
95 static void virtqueue_init(VirtQueue *vq)
96 {
97     hwaddr pa = vq->pa;
98 
99     vq->vring.desc = pa;
100     vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
101     vq->vring.used = vring_align(vq->vring.avail +
102                                  offsetof(VRingAvail, ring[vq->vring.num]),
103                                  vq->vring.align);
104 }
105 
106 static inline uint64_t vring_desc_addr(VirtIODevice *vdev, hwaddr desc_pa,
107                                        int i)
108 {
109     hwaddr pa;
110     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
111     return virtio_ldq_phys(vdev, pa);
112 }
113 
114 static inline uint32_t vring_desc_len(VirtIODevice *vdev, hwaddr desc_pa, int i)
115 {
116     hwaddr pa;
117     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
118     return virtio_ldl_phys(vdev, pa);
119 }
120 
121 static inline uint16_t vring_desc_flags(VirtIODevice *vdev, hwaddr desc_pa,
122                                         int i)
123 {
124     hwaddr pa;
125     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
126     return virtio_lduw_phys(vdev, pa);
127 }
128 
129 static inline uint16_t vring_desc_next(VirtIODevice *vdev, hwaddr desc_pa,
130                                        int i)
131 {
132     hwaddr pa;
133     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
134     return virtio_lduw_phys(vdev, pa);
135 }
136 
137 static inline uint16_t vring_avail_flags(VirtQueue *vq)
138 {
139     hwaddr pa;
140     pa = vq->vring.avail + offsetof(VRingAvail, flags);
141     return virtio_lduw_phys(vq->vdev, pa);
142 }
143 
144 static inline uint16_t vring_avail_idx(VirtQueue *vq)
145 {
146     hwaddr pa;
147     pa = vq->vring.avail + offsetof(VRingAvail, idx);
148     return virtio_lduw_phys(vq->vdev, pa);
149 }
150 
151 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
152 {
153     hwaddr pa;
154     pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
155     return virtio_lduw_phys(vq->vdev, pa);
156 }
157 
158 static inline uint16_t vring_get_used_event(VirtQueue *vq)
159 {
160     return vring_avail_ring(vq, vq->vring.num);
161 }
162 
163 static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
164 {
165     hwaddr pa;
166     pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
167     virtio_stl_phys(vq->vdev, pa, val);
168 }
169 
170 static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
171 {
172     hwaddr pa;
173     pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
174     virtio_stl_phys(vq->vdev, pa, val);
175 }
176 
177 static uint16_t vring_used_idx(VirtQueue *vq)
178 {
179     hwaddr pa;
180     pa = vq->vring.used + offsetof(VRingUsed, idx);
181     return virtio_lduw_phys(vq->vdev, pa);
182 }
183 
184 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
185 {
186     hwaddr pa;
187     pa = vq->vring.used + offsetof(VRingUsed, idx);
188     virtio_stw_phys(vq->vdev, pa, val);
189 }
190 
191 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
192 {
193     VirtIODevice *vdev = vq->vdev;
194     hwaddr pa;
195     pa = vq->vring.used + offsetof(VRingUsed, flags);
196     virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
197 }
198 
199 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
200 {
201     VirtIODevice *vdev = vq->vdev;
202     hwaddr pa;
203     pa = vq->vring.used + offsetof(VRingUsed, flags);
204     virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
205 }
206 
207 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
208 {
209     hwaddr pa;
210     if (!vq->notification) {
211         return;
212     }
213     pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
214     virtio_stw_phys(vq->vdev, pa, val);
215 }
216 
217 void virtio_queue_set_notification(VirtQueue *vq, int enable)
218 {
219     vq->notification = enable;
220     if (virtio_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
221         vring_set_avail_event(vq, vring_avail_idx(vq));
222     } else if (enable) {
223         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
224     } else {
225         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
226     }
227     if (enable) {
228         /* Expose avail event/used flags before caller checks the avail idx. */
229         smp_mb();
230     }
231 }
232 
233 int virtio_queue_ready(VirtQueue *vq)
234 {
235     return vq->vring.avail != 0;
236 }
237 
238 int virtio_queue_empty(VirtQueue *vq)
239 {
240     return vring_avail_idx(vq) == vq->last_avail_idx;
241 }
242 
243 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
244                     unsigned int len, unsigned int idx)
245 {
246     unsigned int offset;
247     int i;
248 
249     trace_virtqueue_fill(vq, elem, len, idx);
250 
251     offset = 0;
252     for (i = 0; i < elem->in_num; i++) {
253         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
254 
255         cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
256                                   elem->in_sg[i].iov_len,
257                                   1, size);
258 
259         offset += size;
260     }
261 
262     for (i = 0; i < elem->out_num; i++)
263         cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
264                                   elem->out_sg[i].iov_len,
265                                   0, elem->out_sg[i].iov_len);
266 
267     idx = (idx + vring_used_idx(vq)) % vq->vring.num;
268 
269     /* Get a pointer to the next entry in the used ring. */
270     vring_used_ring_id(vq, idx, elem->index);
271     vring_used_ring_len(vq, idx, len);
272 }
273 
274 void virtqueue_flush(VirtQueue *vq, unsigned int count)
275 {
276     uint16_t old, new;
277     /* Make sure buffer is written before we update index. */
278     smp_wmb();
279     trace_virtqueue_flush(vq, count);
280     old = vring_used_idx(vq);
281     new = old + count;
282     vring_used_idx_set(vq, new);
283     vq->inuse -= count;
284     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
285         vq->signalled_used_valid = false;
286 }
287 
288 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
289                     unsigned int len)
290 {
291     virtqueue_fill(vq, elem, len, 0);
292     virtqueue_flush(vq, 1);
293 }
294 
295 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
296 {
297     uint16_t num_heads = vring_avail_idx(vq) - idx;
298 
299     /* Check it isn't doing very strange things with descriptor numbers. */
300     if (num_heads > vq->vring.num) {
301         error_report("Guest moved used index from %u to %u",
302                      idx, vring_avail_idx(vq));
303         exit(1);
304     }
305     /* On success, callers read a descriptor at vq->last_avail_idx.
306      * Make sure descriptor read does not bypass avail index read. */
307     if (num_heads) {
308         smp_rmb();
309     }
310 
311     return num_heads;
312 }
313 
314 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
315 {
316     unsigned int head;
317 
318     /* Grab the next descriptor number they're advertising, and increment
319      * the index we've seen. */
320     head = vring_avail_ring(vq, idx % vq->vring.num);
321 
322     /* If their number is silly, that's a fatal mistake. */
323     if (head >= vq->vring.num) {
324         error_report("Guest says index %u is available", head);
325         exit(1);
326     }
327 
328     return head;
329 }
330 
331 static unsigned virtqueue_next_desc(VirtIODevice *vdev, hwaddr desc_pa,
332                                     unsigned int i, unsigned int max)
333 {
334     unsigned int next;
335 
336     /* If this descriptor says it doesn't chain, we're done. */
337     if (!(vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_NEXT)) {
338         return max;
339     }
340 
341     /* Check they're not leading us off end of descriptors. */
342     next = vring_desc_next(vdev, desc_pa, i);
343     /* Make sure compiler knows to grab that: we don't want it changing! */
344     smp_wmb();
345 
346     if (next >= max) {
347         error_report("Desc next is %u", next);
348         exit(1);
349     }
350 
351     return next;
352 }
353 
354 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
355                                unsigned int *out_bytes,
356                                unsigned max_in_bytes, unsigned max_out_bytes)
357 {
358     unsigned int idx;
359     unsigned int total_bufs, in_total, out_total;
360 
361     idx = vq->last_avail_idx;
362 
363     total_bufs = in_total = out_total = 0;
364     while (virtqueue_num_heads(vq, idx)) {
365         VirtIODevice *vdev = vq->vdev;
366         unsigned int max, num_bufs, indirect = 0;
367         hwaddr desc_pa;
368         int i;
369 
370         max = vq->vring.num;
371         num_bufs = total_bufs;
372         i = virtqueue_get_head(vq, idx++);
373         desc_pa = vq->vring.desc;
374 
375         if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_INDIRECT) {
376             if (vring_desc_len(vdev, desc_pa, i) % sizeof(VRingDesc)) {
377                 error_report("Invalid size for indirect buffer table");
378                 exit(1);
379             }
380 
381             /* If we've got too many, that implies a descriptor loop. */
382             if (num_bufs >= max) {
383                 error_report("Looped descriptor");
384                 exit(1);
385             }
386 
387             /* loop over the indirect descriptor table */
388             indirect = 1;
389             max = vring_desc_len(vdev, desc_pa, i) / sizeof(VRingDesc);
390             desc_pa = vring_desc_addr(vdev, desc_pa, i);
391             num_bufs = i = 0;
392         }
393 
394         do {
395             /* If we've got too many, that implies a descriptor loop. */
396             if (++num_bufs > max) {
397                 error_report("Looped descriptor");
398                 exit(1);
399             }
400 
401             if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_WRITE) {
402                 in_total += vring_desc_len(vdev, desc_pa, i);
403             } else {
404                 out_total += vring_desc_len(vdev, desc_pa, i);
405             }
406             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
407                 goto done;
408             }
409         } while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max);
410 
411         if (!indirect)
412             total_bufs = num_bufs;
413         else
414             total_bufs++;
415     }
416 done:
417     if (in_bytes) {
418         *in_bytes = in_total;
419     }
420     if (out_bytes) {
421         *out_bytes = out_total;
422     }
423 }
424 
425 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
426                           unsigned int out_bytes)
427 {
428     unsigned int in_total, out_total;
429 
430     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
431     return in_bytes <= in_total && out_bytes <= out_total;
432 }
433 
434 void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
435     size_t num_sg, int is_write)
436 {
437     unsigned int i;
438     hwaddr len;
439 
440     if (num_sg > VIRTQUEUE_MAX_SIZE) {
441         error_report("virtio: map attempt out of bounds: %zd > %d",
442                      num_sg, VIRTQUEUE_MAX_SIZE);
443         exit(1);
444     }
445 
446     for (i = 0; i < num_sg; i++) {
447         len = sg[i].iov_len;
448         sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
449         if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
450             error_report("virtio: error trying to map MMIO memory");
451             exit(1);
452         }
453     }
454 }
455 
456 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
457 {
458     unsigned int i, head, max;
459     hwaddr desc_pa = vq->vring.desc;
460     VirtIODevice *vdev = vq->vdev;
461 
462     if (!virtqueue_num_heads(vq, vq->last_avail_idx))
463         return 0;
464 
465     /* When we start there are none of either input nor output. */
466     elem->out_num = elem->in_num = 0;
467 
468     max = vq->vring.num;
469 
470     i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
471     if (virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
472         vring_set_avail_event(vq, vq->last_avail_idx);
473     }
474 
475     if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_INDIRECT) {
476         if (vring_desc_len(vdev, desc_pa, i) % sizeof(VRingDesc)) {
477             error_report("Invalid size for indirect buffer table");
478             exit(1);
479         }
480 
481         /* loop over the indirect descriptor table */
482         max = vring_desc_len(vdev, desc_pa, i) / sizeof(VRingDesc);
483         desc_pa = vring_desc_addr(vdev, desc_pa, i);
484         i = 0;
485     }
486 
487     /* Collect all the descriptors */
488     do {
489         struct iovec *sg;
490 
491         if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_WRITE) {
492             if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
493                 error_report("Too many write descriptors in indirect table");
494                 exit(1);
495             }
496             elem->in_addr[elem->in_num] = vring_desc_addr(vdev, desc_pa, i);
497             sg = &elem->in_sg[elem->in_num++];
498         } else {
499             if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
500                 error_report("Too many read descriptors in indirect table");
501                 exit(1);
502             }
503             elem->out_addr[elem->out_num] = vring_desc_addr(vdev, desc_pa, i);
504             sg = &elem->out_sg[elem->out_num++];
505         }
506 
507         sg->iov_len = vring_desc_len(vdev, desc_pa, i);
508 
509         /* If we've got too many, that implies a descriptor loop. */
510         if ((elem->in_num + elem->out_num) > max) {
511             error_report("Looped descriptor");
512             exit(1);
513         }
514     } while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max);
515 
516     /* Now map what we have collected */
517     virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
518     virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
519 
520     elem->index = head;
521 
522     vq->inuse++;
523 
524     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
525     return elem->in_num + elem->out_num;
526 }
527 
528 /* virtio device */
529 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
530 {
531     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
532     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
533 
534     if (k->notify) {
535         k->notify(qbus->parent, vector);
536     }
537 }
538 
539 void virtio_update_irq(VirtIODevice *vdev)
540 {
541     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
542 }
543 
544 void virtio_set_status(VirtIODevice *vdev, uint8_t val)
545 {
546     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
547     trace_virtio_set_status(vdev, val);
548 
549     if (k->set_status) {
550         k->set_status(vdev, val);
551     }
552     vdev->status = val;
553 }
554 
555 bool target_words_bigendian(void);
556 static enum virtio_device_endian virtio_default_endian(void)
557 {
558     if (target_words_bigendian()) {
559         return VIRTIO_DEVICE_ENDIAN_BIG;
560     } else {
561         return VIRTIO_DEVICE_ENDIAN_LITTLE;
562     }
563 }
564 
565 static enum virtio_device_endian virtio_current_cpu_endian(void)
566 {
567     CPUClass *cc = CPU_GET_CLASS(current_cpu);
568 
569     if (cc->virtio_is_big_endian(current_cpu)) {
570         return VIRTIO_DEVICE_ENDIAN_BIG;
571     } else {
572         return VIRTIO_DEVICE_ENDIAN_LITTLE;
573     }
574 }
575 
576 void virtio_reset(void *opaque)
577 {
578     VirtIODevice *vdev = opaque;
579     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
580     int i;
581 
582     virtio_set_status(vdev, 0);
583     if (current_cpu) {
584         /* Guest initiated reset */
585         vdev->device_endian = virtio_current_cpu_endian();
586     } else {
587         /* System reset */
588         vdev->device_endian = virtio_default_endian();
589     }
590 
591     if (k->reset) {
592         k->reset(vdev);
593     }
594 
595     vdev->guest_features = 0;
596     vdev->queue_sel = 0;
597     vdev->status = 0;
598     vdev->isr = 0;
599     vdev->config_vector = VIRTIO_NO_VECTOR;
600     virtio_notify_vector(vdev, vdev->config_vector);
601 
602     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
603         vdev->vq[i].vring.desc = 0;
604         vdev->vq[i].vring.avail = 0;
605         vdev->vq[i].vring.used = 0;
606         vdev->vq[i].last_avail_idx = 0;
607         vdev->vq[i].pa = 0;
608         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
609         vdev->vq[i].signalled_used = 0;
610         vdev->vq[i].signalled_used_valid = false;
611         vdev->vq[i].notification = true;
612     }
613 }
614 
615 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
616 {
617     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
618     uint8_t val;
619 
620     if (addr + sizeof(val) > vdev->config_len) {
621         return (uint32_t)-1;
622     }
623 
624     k->get_config(vdev, vdev->config);
625 
626     val = ldub_p(vdev->config + addr);
627     return val;
628 }
629 
630 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
631 {
632     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
633     uint16_t val;
634 
635     if (addr + sizeof(val) > vdev->config_len) {
636         return (uint32_t)-1;
637     }
638 
639     k->get_config(vdev, vdev->config);
640 
641     val = lduw_p(vdev->config + addr);
642     return val;
643 }
644 
645 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
646 {
647     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
648     uint32_t val;
649 
650     if (addr + sizeof(val) > vdev->config_len) {
651         return (uint32_t)-1;
652     }
653 
654     k->get_config(vdev, vdev->config);
655 
656     val = ldl_p(vdev->config + addr);
657     return val;
658 }
659 
660 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
661 {
662     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
663     uint8_t val = data;
664 
665     if (addr + sizeof(val) > vdev->config_len) {
666         return;
667     }
668 
669     stb_p(vdev->config + addr, val);
670 
671     if (k->set_config) {
672         k->set_config(vdev, vdev->config);
673     }
674 }
675 
676 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
677 {
678     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
679     uint16_t val = data;
680 
681     if (addr + sizeof(val) > vdev->config_len) {
682         return;
683     }
684 
685     stw_p(vdev->config + addr, val);
686 
687     if (k->set_config) {
688         k->set_config(vdev, vdev->config);
689     }
690 }
691 
692 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
693 {
694     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
695     uint32_t val = data;
696 
697     if (addr + sizeof(val) > vdev->config_len) {
698         return;
699     }
700 
701     stl_p(vdev->config + addr, val);
702 
703     if (k->set_config) {
704         k->set_config(vdev, vdev->config);
705     }
706 }
707 
708 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
709 {
710     vdev->vq[n].pa = addr;
711     virtqueue_init(&vdev->vq[n]);
712 }
713 
714 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
715 {
716     return vdev->vq[n].pa;
717 }
718 
719 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
720 {
721     /* Don't allow guest to flip queue between existent and
722      * nonexistent states, or to set it to an invalid size.
723      */
724     if (!!num != !!vdev->vq[n].vring.num ||
725         num > VIRTQUEUE_MAX_SIZE ||
726         num < 0) {
727         return;
728     }
729     vdev->vq[n].vring.num = num;
730     virtqueue_init(&vdev->vq[n]);
731 }
732 
733 int virtio_queue_get_num(VirtIODevice *vdev, int n)
734 {
735     return vdev->vq[n].vring.num;
736 }
737 
738 int virtio_queue_get_id(VirtQueue *vq)
739 {
740     VirtIODevice *vdev = vq->vdev;
741     assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]);
742     return vq - &vdev->vq[0];
743 }
744 
745 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
746 {
747     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
748     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
749 
750     /* Check that the transport told us it was going to do this
751      * (so a buggy transport will immediately assert rather than
752      * silently failing to migrate this state)
753      */
754     assert(k->has_variable_vring_alignment);
755 
756     vdev->vq[n].vring.align = align;
757     virtqueue_init(&vdev->vq[n]);
758 }
759 
760 void virtio_queue_notify_vq(VirtQueue *vq)
761 {
762     if (vq->vring.desc && vq->handle_output) {
763         VirtIODevice *vdev = vq->vdev;
764 
765         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
766         vq->handle_output(vdev, vq);
767     }
768 }
769 
770 void virtio_queue_notify(VirtIODevice *vdev, int n)
771 {
772     virtio_queue_notify_vq(&vdev->vq[n]);
773 }
774 
775 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
776 {
777     return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
778         VIRTIO_NO_VECTOR;
779 }
780 
781 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
782 {
783     if (n < VIRTIO_PCI_QUEUE_MAX)
784         vdev->vq[n].vector = vector;
785 }
786 
787 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
788                             void (*handle_output)(VirtIODevice *, VirtQueue *))
789 {
790     int i;
791 
792     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
793         if (vdev->vq[i].vring.num == 0)
794             break;
795     }
796 
797     if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
798         abort();
799 
800     vdev->vq[i].vring.num = queue_size;
801     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
802     vdev->vq[i].handle_output = handle_output;
803 
804     return &vdev->vq[i];
805 }
806 
807 void virtio_del_queue(VirtIODevice *vdev, int n)
808 {
809     if (n < 0 || n >= VIRTIO_PCI_QUEUE_MAX) {
810         abort();
811     }
812 
813     vdev->vq[n].vring.num = 0;
814 }
815 
816 void virtio_irq(VirtQueue *vq)
817 {
818     trace_virtio_irq(vq);
819     vq->vdev->isr |= 0x01;
820     virtio_notify_vector(vq->vdev, vq->vector);
821 }
822 
823 static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
824 {
825     uint16_t old, new;
826     bool v;
827     /* We need to expose used array entries before checking used event. */
828     smp_mb();
829     /* Always notify when queue is empty (when feature acknowledge) */
830     if (virtio_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
831         !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx) {
832         return true;
833     }
834 
835     if (!virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
836         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
837     }
838 
839     v = vq->signalled_used_valid;
840     vq->signalled_used_valid = true;
841     old = vq->signalled_used;
842     new = vq->signalled_used = vring_used_idx(vq);
843     return !v || vring_need_event(vring_get_used_event(vq), new, old);
844 }
845 
846 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
847 {
848     if (!vring_notify(vdev, vq)) {
849         return;
850     }
851 
852     trace_virtio_notify(vdev, vq);
853     vdev->isr |= 0x01;
854     virtio_notify_vector(vdev, vq->vector);
855 }
856 
857 void virtio_notify_config(VirtIODevice *vdev)
858 {
859     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
860         return;
861 
862     vdev->isr |= 0x03;
863     virtio_notify_vector(vdev, vdev->config_vector);
864 }
865 
866 static bool virtio_device_endian_needed(void *opaque)
867 {
868     VirtIODevice *vdev = opaque;
869 
870     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
871     return vdev->device_endian != virtio_default_endian();
872 }
873 
874 static const VMStateDescription vmstate_virtio_device_endian = {
875     .name = "virtio/device_endian",
876     .version_id = 1,
877     .minimum_version_id = 1,
878     .fields = (VMStateField[]) {
879         VMSTATE_UINT8(device_endian, VirtIODevice),
880         VMSTATE_END_OF_LIST()
881     }
882 };
883 
884 static const VMStateDescription vmstate_virtio = {
885     .name = "virtio",
886     .version_id = 1,
887     .minimum_version_id = 1,
888     .minimum_version_id_old = 1,
889     .fields = (VMStateField[]) {
890         VMSTATE_END_OF_LIST()
891     },
892     .subsections = (VMStateSubsection[]) {
893         {
894             .vmsd = &vmstate_virtio_device_endian,
895             .needed = &virtio_device_endian_needed
896         },
897         { 0 }
898     }
899 };
900 
901 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
902 {
903     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
904     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
905     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
906     int i;
907 
908     if (k->save_config) {
909         k->save_config(qbus->parent, f);
910     }
911 
912     qemu_put_8s(f, &vdev->status);
913     qemu_put_8s(f, &vdev->isr);
914     qemu_put_be16s(f, &vdev->queue_sel);
915     qemu_put_be32s(f, &vdev->guest_features);
916     qemu_put_be32(f, vdev->config_len);
917     qemu_put_buffer(f, vdev->config, vdev->config_len);
918 
919     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
920         if (vdev->vq[i].vring.num == 0)
921             break;
922     }
923 
924     qemu_put_be32(f, i);
925 
926     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
927         if (vdev->vq[i].vring.num == 0)
928             break;
929 
930         qemu_put_be32(f, vdev->vq[i].vring.num);
931         if (k->has_variable_vring_alignment) {
932             qemu_put_be32(f, vdev->vq[i].vring.align);
933         }
934         qemu_put_be64(f, vdev->vq[i].pa);
935         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
936         if (k->save_queue) {
937             k->save_queue(qbus->parent, i, f);
938         }
939     }
940 
941     if (vdc->save != NULL) {
942         vdc->save(vdev, f);
943     }
944 
945     /* Subsections */
946     vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
947 }
948 
949 int virtio_set_features(VirtIODevice *vdev, uint32_t val)
950 {
951     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
952     VirtioBusClass *vbusk = VIRTIO_BUS_GET_CLASS(qbus);
953     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
954     uint32_t supported_features = vbusk->get_features(qbus->parent);
955     bool bad = (val & ~supported_features) != 0;
956 
957     val &= supported_features;
958     if (k->set_features) {
959         k->set_features(vdev, val);
960     }
961     vdev->guest_features = val;
962     return bad ? -1 : 0;
963 }
964 
965 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
966 {
967     int i, ret;
968     int32_t config_len;
969     uint32_t num;
970     uint32_t features;
971     uint32_t supported_features;
972     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
973     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
974     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
975 
976     /*
977      * We poison the endianness to ensure it does not get used before
978      * subsections have been loaded.
979      */
980     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
981 
982     if (k->load_config) {
983         ret = k->load_config(qbus->parent, f);
984         if (ret)
985             return ret;
986     }
987 
988     qemu_get_8s(f, &vdev->status);
989     qemu_get_8s(f, &vdev->isr);
990     qemu_get_be16s(f, &vdev->queue_sel);
991     if (vdev->queue_sel >= VIRTIO_PCI_QUEUE_MAX) {
992         return -1;
993     }
994     qemu_get_be32s(f, &features);
995 
996     if (virtio_set_features(vdev, features) < 0) {
997         supported_features = k->get_features(qbus->parent);
998         error_report("Features 0x%x unsupported. Allowed features: 0x%x",
999                      features, supported_features);
1000         return -1;
1001     }
1002     config_len = qemu_get_be32(f);
1003 
1004     /*
1005      * There are cases where the incoming config can be bigger or smaller
1006      * than what we have; so load what we have space for, and skip
1007      * any excess that's in the stream.
1008      */
1009     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
1010 
1011     while (config_len > vdev->config_len) {
1012         qemu_get_byte(f);
1013         config_len--;
1014     }
1015 
1016     num = qemu_get_be32(f);
1017 
1018     if (num > VIRTIO_PCI_QUEUE_MAX) {
1019         error_report("Invalid number of PCI queues: 0x%x", num);
1020         return -1;
1021     }
1022 
1023     for (i = 0; i < num; i++) {
1024         vdev->vq[i].vring.num = qemu_get_be32(f);
1025         if (k->has_variable_vring_alignment) {
1026             vdev->vq[i].vring.align = qemu_get_be32(f);
1027         }
1028         vdev->vq[i].pa = qemu_get_be64(f);
1029         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
1030         vdev->vq[i].signalled_used_valid = false;
1031         vdev->vq[i].notification = true;
1032 
1033         if (vdev->vq[i].pa) {
1034             virtqueue_init(&vdev->vq[i]);
1035         } else if (vdev->vq[i].last_avail_idx) {
1036             error_report("VQ %d address 0x0 "
1037                          "inconsistent with Host index 0x%x",
1038                          i, vdev->vq[i].last_avail_idx);
1039                 return -1;
1040 	}
1041         if (k->load_queue) {
1042             ret = k->load_queue(qbus->parent, i, f);
1043             if (ret)
1044                 return ret;
1045         }
1046     }
1047 
1048     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1049 
1050     if (vdc->load != NULL) {
1051         ret = vdc->load(vdev, f, version_id);
1052         if (ret) {
1053             return ret;
1054         }
1055     }
1056 
1057     /* Subsections */
1058     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
1059     if (ret) {
1060         return ret;
1061     }
1062 
1063     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
1064         vdev->device_endian = virtio_default_endian();
1065     }
1066 
1067     for (i = 0; i < num; i++) {
1068         if (vdev->vq[i].pa) {
1069             uint16_t nheads;
1070             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
1071             /* Check it isn't doing strange things with descriptor numbers. */
1072             if (nheads > vdev->vq[i].vring.num) {
1073                 error_report("VQ %d size 0x%x Guest index 0x%x "
1074                              "inconsistent with Host index 0x%x: delta 0x%x",
1075                              i, vdev->vq[i].vring.num,
1076                              vring_avail_idx(&vdev->vq[i]),
1077                              vdev->vq[i].last_avail_idx, nheads);
1078                 return -1;
1079             }
1080         }
1081     }
1082 
1083     return 0;
1084 }
1085 
1086 void virtio_cleanup(VirtIODevice *vdev)
1087 {
1088     qemu_del_vm_change_state_handler(vdev->vmstate);
1089     g_free(vdev->config);
1090     g_free(vdev->vq);
1091 }
1092 
1093 static void virtio_vmstate_change(void *opaque, int running, RunState state)
1094 {
1095     VirtIODevice *vdev = opaque;
1096     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1097     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1098     bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
1099     vdev->vm_running = running;
1100 
1101     if (backend_run) {
1102         virtio_set_status(vdev, vdev->status);
1103     }
1104 
1105     if (k->vmstate_change) {
1106         k->vmstate_change(qbus->parent, backend_run);
1107     }
1108 
1109     if (!backend_run) {
1110         virtio_set_status(vdev, vdev->status);
1111     }
1112 }
1113 
1114 void virtio_instance_init_common(Object *proxy_obj, void *data,
1115                                  size_t vdev_size, const char *vdev_name)
1116 {
1117     DeviceState *vdev = data;
1118 
1119     object_initialize(vdev, vdev_size, vdev_name);
1120     object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
1121     object_unref(OBJECT(vdev));
1122     qdev_alias_all_properties(vdev, proxy_obj);
1123 }
1124 
1125 void virtio_init(VirtIODevice *vdev, const char *name,
1126                  uint16_t device_id, size_t config_size)
1127 {
1128     int i;
1129     vdev->device_id = device_id;
1130     vdev->status = 0;
1131     vdev->isr = 0;
1132     vdev->queue_sel = 0;
1133     vdev->config_vector = VIRTIO_NO_VECTOR;
1134     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
1135     vdev->vm_running = runstate_is_running();
1136     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
1137         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1138         vdev->vq[i].vdev = vdev;
1139         vdev->vq[i].queue_index = i;
1140     }
1141 
1142     vdev->name = name;
1143     vdev->config_len = config_size;
1144     if (vdev->config_len) {
1145         vdev->config = g_malloc0(config_size);
1146     } else {
1147         vdev->config = NULL;
1148     }
1149     vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
1150                                                      vdev);
1151     vdev->device_endian = virtio_default_endian();
1152 }
1153 
1154 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1155 {
1156     return vdev->vq[n].vring.desc;
1157 }
1158 
1159 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1160 {
1161     return vdev->vq[n].vring.avail;
1162 }
1163 
1164 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1165 {
1166     return vdev->vq[n].vring.used;
1167 }
1168 
1169 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
1170 {
1171     return vdev->vq[n].vring.desc;
1172 }
1173 
1174 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1175 {
1176     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1177 }
1178 
1179 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1180 {
1181     return offsetof(VRingAvail, ring) +
1182         sizeof(uint64_t) * vdev->vq[n].vring.num;
1183 }
1184 
1185 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1186 {
1187     return offsetof(VRingUsed, ring) +
1188         sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1189 }
1190 
1191 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
1192 {
1193     return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
1194 	    virtio_queue_get_used_size(vdev, n);
1195 }
1196 
1197 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1198 {
1199     return vdev->vq[n].last_avail_idx;
1200 }
1201 
1202 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1203 {
1204     vdev->vq[n].last_avail_idx = idx;
1205 }
1206 
1207 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
1208 {
1209     vdev->vq[n].signalled_used_valid = false;
1210 }
1211 
1212 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
1213 {
1214     return vdev->vq + n;
1215 }
1216 
1217 uint16_t virtio_get_queue_index(VirtQueue *vq)
1218 {
1219     return vq->queue_index;
1220 }
1221 
1222 static void virtio_queue_guest_notifier_read(EventNotifier *n)
1223 {
1224     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
1225     if (event_notifier_test_and_clear(n)) {
1226         virtio_irq(vq);
1227     }
1228 }
1229 
1230 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
1231                                                 bool with_irqfd)
1232 {
1233     if (assign && !with_irqfd) {
1234         event_notifier_set_handler(&vq->guest_notifier,
1235                                    virtio_queue_guest_notifier_read);
1236     } else {
1237         event_notifier_set_handler(&vq->guest_notifier, NULL);
1238     }
1239     if (!assign) {
1240         /* Test and clear notifier before closing it,
1241          * in case poll callback didn't have time to run. */
1242         virtio_queue_guest_notifier_read(&vq->guest_notifier);
1243     }
1244 }
1245 
1246 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
1247 {
1248     return &vq->guest_notifier;
1249 }
1250 
1251 static void virtio_queue_host_notifier_read(EventNotifier *n)
1252 {
1253     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1254     if (event_notifier_test_and_clear(n)) {
1255         virtio_queue_notify_vq(vq);
1256     }
1257 }
1258 
1259 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
1260                                                bool set_handler)
1261 {
1262     if (assign && set_handler) {
1263         event_notifier_set_handler(&vq->host_notifier,
1264                                    virtio_queue_host_notifier_read);
1265     } else {
1266         event_notifier_set_handler(&vq->host_notifier, NULL);
1267     }
1268     if (!assign) {
1269         /* Test and clear notifier before after disabling event,
1270          * in case poll callback didn't have time to run. */
1271         virtio_queue_host_notifier_read(&vq->host_notifier);
1272     }
1273 }
1274 
1275 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
1276 {
1277     return &vq->host_notifier;
1278 }
1279 
1280 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
1281 {
1282     g_free(vdev->bus_name);
1283     vdev->bus_name = g_strdup(bus_name);
1284 }
1285 
1286 static void virtio_device_realize(DeviceState *dev, Error **errp)
1287 {
1288     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1289     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1290     Error *err = NULL;
1291 
1292     if (vdc->realize != NULL) {
1293         vdc->realize(dev, &err);
1294         if (err != NULL) {
1295             error_propagate(errp, err);
1296             return;
1297         }
1298     }
1299     virtio_bus_device_plugged(vdev);
1300 }
1301 
1302 static void virtio_device_unrealize(DeviceState *dev, Error **errp)
1303 {
1304     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1305     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1306     Error *err = NULL;
1307 
1308     virtio_bus_device_unplugged(vdev);
1309 
1310     if (vdc->unrealize != NULL) {
1311         vdc->unrealize(dev, &err);
1312         if (err != NULL) {
1313             error_propagate(errp, err);
1314             return;
1315         }
1316     }
1317 
1318     g_free(vdev->bus_name);
1319     vdev->bus_name = NULL;
1320 }
1321 
1322 static void virtio_device_class_init(ObjectClass *klass, void *data)
1323 {
1324     /* Set the default value here. */
1325     DeviceClass *dc = DEVICE_CLASS(klass);
1326 
1327     dc->realize = virtio_device_realize;
1328     dc->unrealize = virtio_device_unrealize;
1329     dc->bus_type = TYPE_VIRTIO_BUS;
1330 }
1331 
1332 static const TypeInfo virtio_device_info = {
1333     .name = TYPE_VIRTIO_DEVICE,
1334     .parent = TYPE_DEVICE,
1335     .instance_size = sizeof(VirtIODevice),
1336     .class_init = virtio_device_class_init,
1337     .abstract = true,
1338     .class_size = sizeof(VirtioDeviceClass),
1339 };
1340 
1341 static void virtio_register_types(void)
1342 {
1343     type_register_static(&virtio_device_info);
1344 }
1345 
1346 type_init(virtio_register_types)
1347