xref: /openbmc/qemu/hw/virtio/virtio.c (revision 93f7c4f0)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include <inttypes.h>
15 
16 #include "trace.h"
17 #include "exec/address-spaces.h"
18 #include "qemu/error-report.h"
19 #include "hw/virtio/virtio.h"
20 #include "qemu/atomic.h"
21 #include "hw/virtio/virtio-bus.h"
22 #include "migration/migration.h"
23 #include "hw/virtio/virtio-access.h"
24 
25 /*
26  * The alignment to use between consumer and producer parts of vring.
27  * x86 pagesize again. This is the default, used by transports like PCI
28  * which don't provide a means for the guest to tell the host the alignment.
29  */
30 #define VIRTIO_PCI_VRING_ALIGN         4096
31 
32 typedef struct VRingDesc
33 {
34     uint64_t addr;
35     uint32_t len;
36     uint16_t flags;
37     uint16_t next;
38 } VRingDesc;
39 
40 typedef struct VRingAvail
41 {
42     uint16_t flags;
43     uint16_t idx;
44     uint16_t ring[0];
45 } VRingAvail;
46 
47 typedef struct VRingUsedElem
48 {
49     uint32_t id;
50     uint32_t len;
51 } VRingUsedElem;
52 
53 typedef struct VRingUsed
54 {
55     uint16_t flags;
56     uint16_t idx;
57     VRingUsedElem ring[0];
58 } VRingUsed;
59 
60 typedef struct VRing
61 {
62     unsigned int num;
63     unsigned int align;
64     hwaddr desc;
65     hwaddr avail;
66     hwaddr used;
67 } VRing;
68 
69 struct VirtQueue
70 {
71     VRing vring;
72     hwaddr pa;
73     uint16_t last_avail_idx;
74     /* Last used index value we have signalled on */
75     uint16_t signalled_used;
76 
77     /* Last used index value we have signalled on */
78     bool signalled_used_valid;
79 
80     /* Notification enabled? */
81     bool notification;
82 
83     uint16_t queue_index;
84 
85     int inuse;
86 
87     uint16_t vector;
88     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
89     VirtIODevice *vdev;
90     EventNotifier guest_notifier;
91     EventNotifier host_notifier;
92 };
93 
94 /* virt queue functions */
95 static void virtqueue_init(VirtQueue *vq)
96 {
97     hwaddr pa = vq->pa;
98 
99     vq->vring.desc = pa;
100     vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
101     vq->vring.used = vring_align(vq->vring.avail +
102                                  offsetof(VRingAvail, ring[vq->vring.num]),
103                                  vq->vring.align);
104 }
105 
106 static inline uint64_t vring_desc_addr(VirtIODevice *vdev, hwaddr desc_pa,
107                                        int i)
108 {
109     hwaddr pa;
110     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
111     return virtio_ldq_phys(vdev, pa);
112 }
113 
114 static inline uint32_t vring_desc_len(VirtIODevice *vdev, hwaddr desc_pa, int i)
115 {
116     hwaddr pa;
117     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
118     return virtio_ldl_phys(vdev, pa);
119 }
120 
121 static inline uint16_t vring_desc_flags(VirtIODevice *vdev, hwaddr desc_pa,
122                                         int i)
123 {
124     hwaddr pa;
125     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
126     return virtio_lduw_phys(vdev, pa);
127 }
128 
129 static inline uint16_t vring_desc_next(VirtIODevice *vdev, hwaddr desc_pa,
130                                        int i)
131 {
132     hwaddr pa;
133     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
134     return virtio_lduw_phys(vdev, pa);
135 }
136 
137 static inline uint16_t vring_avail_flags(VirtQueue *vq)
138 {
139     hwaddr pa;
140     pa = vq->vring.avail + offsetof(VRingAvail, flags);
141     return virtio_lduw_phys(vq->vdev, pa);
142 }
143 
144 static inline uint16_t vring_avail_idx(VirtQueue *vq)
145 {
146     hwaddr pa;
147     pa = vq->vring.avail + offsetof(VRingAvail, idx);
148     return virtio_lduw_phys(vq->vdev, pa);
149 }
150 
151 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
152 {
153     hwaddr pa;
154     pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
155     return virtio_lduw_phys(vq->vdev, pa);
156 }
157 
158 static inline uint16_t vring_get_used_event(VirtQueue *vq)
159 {
160     return vring_avail_ring(vq, vq->vring.num);
161 }
162 
163 static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
164 {
165     hwaddr pa;
166     pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
167     virtio_stl_phys(vq->vdev, pa, val);
168 }
169 
170 static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
171 {
172     hwaddr pa;
173     pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
174     virtio_stl_phys(vq->vdev, pa, val);
175 }
176 
177 static uint16_t vring_used_idx(VirtQueue *vq)
178 {
179     hwaddr pa;
180     pa = vq->vring.used + offsetof(VRingUsed, idx);
181     return virtio_lduw_phys(vq->vdev, pa);
182 }
183 
184 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
185 {
186     hwaddr pa;
187     pa = vq->vring.used + offsetof(VRingUsed, idx);
188     virtio_stw_phys(vq->vdev, pa, val);
189 }
190 
191 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
192 {
193     VirtIODevice *vdev = vq->vdev;
194     hwaddr pa;
195     pa = vq->vring.used + offsetof(VRingUsed, flags);
196     virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
197 }
198 
199 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
200 {
201     VirtIODevice *vdev = vq->vdev;
202     hwaddr pa;
203     pa = vq->vring.used + offsetof(VRingUsed, flags);
204     virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
205 }
206 
207 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
208 {
209     hwaddr pa;
210     if (!vq->notification) {
211         return;
212     }
213     pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
214     virtio_stw_phys(vq->vdev, pa, val);
215 }
216 
217 void virtio_queue_set_notification(VirtQueue *vq, int enable)
218 {
219     vq->notification = enable;
220     if (virtio_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
221         vring_set_avail_event(vq, vring_avail_idx(vq));
222     } else if (enable) {
223         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
224     } else {
225         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
226     }
227     if (enable) {
228         /* Expose avail event/used flags before caller checks the avail idx. */
229         smp_mb();
230     }
231 }
232 
233 int virtio_queue_ready(VirtQueue *vq)
234 {
235     return vq->vring.avail != 0;
236 }
237 
238 int virtio_queue_empty(VirtQueue *vq)
239 {
240     return vring_avail_idx(vq) == vq->last_avail_idx;
241 }
242 
243 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
244                     unsigned int len, unsigned int idx)
245 {
246     unsigned int offset;
247     int i;
248 
249     trace_virtqueue_fill(vq, elem, len, idx);
250 
251     offset = 0;
252     for (i = 0; i < elem->in_num; i++) {
253         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
254 
255         cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
256                                   elem->in_sg[i].iov_len,
257                                   1, size);
258 
259         offset += size;
260     }
261 
262     for (i = 0; i < elem->out_num; i++)
263         cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
264                                   elem->out_sg[i].iov_len,
265                                   0, elem->out_sg[i].iov_len);
266 
267     idx = (idx + vring_used_idx(vq)) % vq->vring.num;
268 
269     /* Get a pointer to the next entry in the used ring. */
270     vring_used_ring_id(vq, idx, elem->index);
271     vring_used_ring_len(vq, idx, len);
272 }
273 
274 void virtqueue_flush(VirtQueue *vq, unsigned int count)
275 {
276     uint16_t old, new;
277     /* Make sure buffer is written before we update index. */
278     smp_wmb();
279     trace_virtqueue_flush(vq, count);
280     old = vring_used_idx(vq);
281     new = old + count;
282     vring_used_idx_set(vq, new);
283     vq->inuse -= count;
284     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
285         vq->signalled_used_valid = false;
286 }
287 
288 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
289                     unsigned int len)
290 {
291     virtqueue_fill(vq, elem, len, 0);
292     virtqueue_flush(vq, 1);
293 }
294 
295 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
296 {
297     uint16_t num_heads = vring_avail_idx(vq) - idx;
298 
299     /* Check it isn't doing very strange things with descriptor numbers. */
300     if (num_heads > vq->vring.num) {
301         error_report("Guest moved used index from %u to %u",
302                      idx, vring_avail_idx(vq));
303         exit(1);
304     }
305     /* On success, callers read a descriptor at vq->last_avail_idx.
306      * Make sure descriptor read does not bypass avail index read. */
307     if (num_heads) {
308         smp_rmb();
309     }
310 
311     return num_heads;
312 }
313 
314 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
315 {
316     unsigned int head;
317 
318     /* Grab the next descriptor number they're advertising, and increment
319      * the index we've seen. */
320     head = vring_avail_ring(vq, idx % vq->vring.num);
321 
322     /* If their number is silly, that's a fatal mistake. */
323     if (head >= vq->vring.num) {
324         error_report("Guest says index %u is available", head);
325         exit(1);
326     }
327 
328     return head;
329 }
330 
331 static unsigned virtqueue_next_desc(VirtIODevice *vdev, hwaddr desc_pa,
332                                     unsigned int i, unsigned int max)
333 {
334     unsigned int next;
335 
336     /* If this descriptor says it doesn't chain, we're done. */
337     if (!(vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_NEXT)) {
338         return max;
339     }
340 
341     /* Check they're not leading us off end of descriptors. */
342     next = vring_desc_next(vdev, desc_pa, i);
343     /* Make sure compiler knows to grab that: we don't want it changing! */
344     smp_wmb();
345 
346     if (next >= max) {
347         error_report("Desc next is %u", next);
348         exit(1);
349     }
350 
351     return next;
352 }
353 
354 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
355                                unsigned int *out_bytes,
356                                unsigned max_in_bytes, unsigned max_out_bytes)
357 {
358     unsigned int idx;
359     unsigned int total_bufs, in_total, out_total;
360 
361     idx = vq->last_avail_idx;
362 
363     total_bufs = in_total = out_total = 0;
364     while (virtqueue_num_heads(vq, idx)) {
365         VirtIODevice *vdev = vq->vdev;
366         unsigned int max, num_bufs, indirect = 0;
367         hwaddr desc_pa;
368         int i;
369 
370         max = vq->vring.num;
371         num_bufs = total_bufs;
372         i = virtqueue_get_head(vq, idx++);
373         desc_pa = vq->vring.desc;
374 
375         if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_INDIRECT) {
376             if (vring_desc_len(vdev, desc_pa, i) % sizeof(VRingDesc)) {
377                 error_report("Invalid size for indirect buffer table");
378                 exit(1);
379             }
380 
381             /* If we've got too many, that implies a descriptor loop. */
382             if (num_bufs >= max) {
383                 error_report("Looped descriptor");
384                 exit(1);
385             }
386 
387             /* loop over the indirect descriptor table */
388             indirect = 1;
389             max = vring_desc_len(vdev, desc_pa, i) / sizeof(VRingDesc);
390             desc_pa = vring_desc_addr(vdev, desc_pa, i);
391             num_bufs = i = 0;
392         }
393 
394         do {
395             /* If we've got too many, that implies a descriptor loop. */
396             if (++num_bufs > max) {
397                 error_report("Looped descriptor");
398                 exit(1);
399             }
400 
401             if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_WRITE) {
402                 in_total += vring_desc_len(vdev, desc_pa, i);
403             } else {
404                 out_total += vring_desc_len(vdev, desc_pa, i);
405             }
406             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
407                 goto done;
408             }
409         } while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max);
410 
411         if (!indirect)
412             total_bufs = num_bufs;
413         else
414             total_bufs++;
415     }
416 done:
417     if (in_bytes) {
418         *in_bytes = in_total;
419     }
420     if (out_bytes) {
421         *out_bytes = out_total;
422     }
423 }
424 
425 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
426                           unsigned int out_bytes)
427 {
428     unsigned int in_total, out_total;
429 
430     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
431     return in_bytes <= in_total && out_bytes <= out_total;
432 }
433 
434 void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
435     size_t num_sg, int is_write)
436 {
437     unsigned int i;
438     hwaddr len;
439 
440     if (num_sg > VIRTQUEUE_MAX_SIZE) {
441         error_report("virtio: map attempt out of bounds: %zd > %d",
442                      num_sg, VIRTQUEUE_MAX_SIZE);
443         exit(1);
444     }
445 
446     for (i = 0; i < num_sg; i++) {
447         len = sg[i].iov_len;
448         sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
449         if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
450             error_report("virtio: error trying to map MMIO memory");
451             exit(1);
452         }
453     }
454 }
455 
456 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
457 {
458     unsigned int i, head, max;
459     hwaddr desc_pa = vq->vring.desc;
460     VirtIODevice *vdev = vq->vdev;
461 
462     if (!virtqueue_num_heads(vq, vq->last_avail_idx))
463         return 0;
464 
465     /* When we start there are none of either input nor output. */
466     elem->out_num = elem->in_num = 0;
467 
468     max = vq->vring.num;
469 
470     i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
471     if (virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
472         vring_set_avail_event(vq, vq->last_avail_idx);
473     }
474 
475     if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_INDIRECT) {
476         if (vring_desc_len(vdev, desc_pa, i) % sizeof(VRingDesc)) {
477             error_report("Invalid size for indirect buffer table");
478             exit(1);
479         }
480 
481         /* loop over the indirect descriptor table */
482         max = vring_desc_len(vdev, desc_pa, i) / sizeof(VRingDesc);
483         desc_pa = vring_desc_addr(vdev, desc_pa, i);
484         i = 0;
485     }
486 
487     /* Collect all the descriptors */
488     do {
489         struct iovec *sg;
490 
491         if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_WRITE) {
492             if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
493                 error_report("Too many write descriptors in indirect table");
494                 exit(1);
495             }
496             elem->in_addr[elem->in_num] = vring_desc_addr(vdev, desc_pa, i);
497             sg = &elem->in_sg[elem->in_num++];
498         } else {
499             if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
500                 error_report("Too many read descriptors in indirect table");
501                 exit(1);
502             }
503             elem->out_addr[elem->out_num] = vring_desc_addr(vdev, desc_pa, i);
504             sg = &elem->out_sg[elem->out_num++];
505         }
506 
507         sg->iov_len = vring_desc_len(vdev, desc_pa, i);
508 
509         /* If we've got too many, that implies a descriptor loop. */
510         if ((elem->in_num + elem->out_num) > max) {
511             error_report("Looped descriptor");
512             exit(1);
513         }
514     } while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max);
515 
516     /* Now map what we have collected */
517     virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
518     virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
519 
520     elem->index = head;
521 
522     vq->inuse++;
523 
524     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
525     return elem->in_num + elem->out_num;
526 }
527 
528 /* virtio device */
529 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
530 {
531     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
532     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
533 
534     if (k->notify) {
535         k->notify(qbus->parent, vector);
536     }
537 }
538 
539 void virtio_update_irq(VirtIODevice *vdev)
540 {
541     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
542 }
543 
544 void virtio_set_status(VirtIODevice *vdev, uint8_t val)
545 {
546     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
547     trace_virtio_set_status(vdev, val);
548 
549     if (k->set_status) {
550         k->set_status(vdev, val);
551     }
552     vdev->status = val;
553 }
554 
555 bool target_words_bigendian(void);
556 static enum virtio_device_endian virtio_default_endian(void)
557 {
558     if (target_words_bigendian()) {
559         return VIRTIO_DEVICE_ENDIAN_BIG;
560     } else {
561         return VIRTIO_DEVICE_ENDIAN_LITTLE;
562     }
563 }
564 
565 static enum virtio_device_endian virtio_current_cpu_endian(void)
566 {
567     CPUClass *cc = CPU_GET_CLASS(current_cpu);
568 
569     if (cc->virtio_is_big_endian(current_cpu)) {
570         return VIRTIO_DEVICE_ENDIAN_BIG;
571     } else {
572         return VIRTIO_DEVICE_ENDIAN_LITTLE;
573     }
574 }
575 
576 void virtio_reset(void *opaque)
577 {
578     VirtIODevice *vdev = opaque;
579     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
580     int i;
581 
582     virtio_set_status(vdev, 0);
583     if (current_cpu) {
584         /* Guest initiated reset */
585         vdev->device_endian = virtio_current_cpu_endian();
586     } else {
587         /* System reset */
588         vdev->device_endian = virtio_default_endian();
589     }
590 
591     if (k->reset) {
592         k->reset(vdev);
593     }
594 
595     vdev->guest_features = 0;
596     vdev->queue_sel = 0;
597     vdev->status = 0;
598     vdev->isr = 0;
599     vdev->config_vector = VIRTIO_NO_VECTOR;
600     virtio_notify_vector(vdev, vdev->config_vector);
601 
602     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
603         vdev->vq[i].vring.desc = 0;
604         vdev->vq[i].vring.avail = 0;
605         vdev->vq[i].vring.used = 0;
606         vdev->vq[i].last_avail_idx = 0;
607         vdev->vq[i].pa = 0;
608         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
609         vdev->vq[i].signalled_used = 0;
610         vdev->vq[i].signalled_used_valid = false;
611         vdev->vq[i].notification = true;
612     }
613 }
614 
615 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
616 {
617     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
618     uint8_t val;
619 
620     if (addr + sizeof(val) > vdev->config_len) {
621         return (uint32_t)-1;
622     }
623 
624     k->get_config(vdev, vdev->config);
625 
626     val = ldub_p(vdev->config + addr);
627     return val;
628 }
629 
630 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
631 {
632     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
633     uint16_t val;
634 
635     if (addr + sizeof(val) > vdev->config_len) {
636         return (uint32_t)-1;
637     }
638 
639     k->get_config(vdev, vdev->config);
640 
641     val = lduw_p(vdev->config + addr);
642     return val;
643 }
644 
645 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
646 {
647     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
648     uint32_t val;
649 
650     if (addr + sizeof(val) > vdev->config_len) {
651         return (uint32_t)-1;
652     }
653 
654     k->get_config(vdev, vdev->config);
655 
656     val = ldl_p(vdev->config + addr);
657     return val;
658 }
659 
660 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
661 {
662     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
663     uint8_t val = data;
664 
665     if (addr + sizeof(val) > vdev->config_len) {
666         return;
667     }
668 
669     stb_p(vdev->config + addr, val);
670 
671     if (k->set_config) {
672         k->set_config(vdev, vdev->config);
673     }
674 }
675 
676 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
677 {
678     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
679     uint16_t val = data;
680 
681     if (addr + sizeof(val) > vdev->config_len) {
682         return;
683     }
684 
685     stw_p(vdev->config + addr, val);
686 
687     if (k->set_config) {
688         k->set_config(vdev, vdev->config);
689     }
690 }
691 
692 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
693 {
694     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
695     uint32_t val = data;
696 
697     if (addr + sizeof(val) > vdev->config_len) {
698         return;
699     }
700 
701     stl_p(vdev->config + addr, val);
702 
703     if (k->set_config) {
704         k->set_config(vdev, vdev->config);
705     }
706 }
707 
708 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
709 {
710     vdev->vq[n].pa = addr;
711     virtqueue_init(&vdev->vq[n]);
712 }
713 
714 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
715 {
716     return vdev->vq[n].pa;
717 }
718 
719 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
720 {
721     /* Don't allow guest to flip queue between existent and
722      * nonexistent states, or to set it to an invalid size.
723      */
724     if (!!num != !!vdev->vq[n].vring.num ||
725         num > VIRTQUEUE_MAX_SIZE ||
726         num < 0) {
727         return;
728     }
729     vdev->vq[n].vring.num = num;
730     virtqueue_init(&vdev->vq[n]);
731 }
732 
733 int virtio_queue_get_num(VirtIODevice *vdev, int n)
734 {
735     return vdev->vq[n].vring.num;
736 }
737 
738 int virtio_queue_get_id(VirtQueue *vq)
739 {
740     VirtIODevice *vdev = vq->vdev;
741     assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]);
742     return vq - &vdev->vq[0];
743 }
744 
745 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
746 {
747     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
748     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
749 
750     /* Check that the transport told us it was going to do this
751      * (so a buggy transport will immediately assert rather than
752      * silently failing to migrate this state)
753      */
754     assert(k->has_variable_vring_alignment);
755 
756     vdev->vq[n].vring.align = align;
757     virtqueue_init(&vdev->vq[n]);
758 }
759 
760 void virtio_queue_notify_vq(VirtQueue *vq)
761 {
762     if (vq->vring.desc) {
763         VirtIODevice *vdev = vq->vdev;
764         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
765         vq->handle_output(vdev, vq);
766     }
767 }
768 
769 void virtio_queue_notify(VirtIODevice *vdev, int n)
770 {
771     virtio_queue_notify_vq(&vdev->vq[n]);
772 }
773 
774 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
775 {
776     return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
777         VIRTIO_NO_VECTOR;
778 }
779 
780 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
781 {
782     if (n < VIRTIO_PCI_QUEUE_MAX)
783         vdev->vq[n].vector = vector;
784 }
785 
786 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
787                             void (*handle_output)(VirtIODevice *, VirtQueue *))
788 {
789     int i;
790 
791     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
792         if (vdev->vq[i].vring.num == 0)
793             break;
794     }
795 
796     if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
797         abort();
798 
799     vdev->vq[i].vring.num = queue_size;
800     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
801     vdev->vq[i].handle_output = handle_output;
802 
803     return &vdev->vq[i];
804 }
805 
806 void virtio_del_queue(VirtIODevice *vdev, int n)
807 {
808     if (n < 0 || n >= VIRTIO_PCI_QUEUE_MAX) {
809         abort();
810     }
811 
812     vdev->vq[n].vring.num = 0;
813 }
814 
815 void virtio_irq(VirtQueue *vq)
816 {
817     trace_virtio_irq(vq);
818     vq->vdev->isr |= 0x01;
819     virtio_notify_vector(vq->vdev, vq->vector);
820 }
821 
822 static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
823 {
824     uint16_t old, new;
825     bool v;
826     /* We need to expose used array entries before checking used event. */
827     smp_mb();
828     /* Always notify when queue is empty (when feature acknowledge) */
829     if (virtio_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
830         !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx) {
831         return true;
832     }
833 
834     if (!virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
835         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
836     }
837 
838     v = vq->signalled_used_valid;
839     vq->signalled_used_valid = true;
840     old = vq->signalled_used;
841     new = vq->signalled_used = vring_used_idx(vq);
842     return !v || vring_need_event(vring_get_used_event(vq), new, old);
843 }
844 
845 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
846 {
847     if (!vring_notify(vdev, vq)) {
848         return;
849     }
850 
851     trace_virtio_notify(vdev, vq);
852     vdev->isr |= 0x01;
853     virtio_notify_vector(vdev, vq->vector);
854 }
855 
856 void virtio_notify_config(VirtIODevice *vdev)
857 {
858     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
859         return;
860 
861     vdev->isr |= 0x03;
862     virtio_notify_vector(vdev, vdev->config_vector);
863 }
864 
865 static bool virtio_device_endian_needed(void *opaque)
866 {
867     VirtIODevice *vdev = opaque;
868 
869     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
870     return vdev->device_endian != virtio_default_endian();
871 }
872 
873 static const VMStateDescription vmstate_virtio_device_endian = {
874     .name = "virtio/device_endian",
875     .version_id = 1,
876     .minimum_version_id = 1,
877     .fields = (VMStateField[]) {
878         VMSTATE_UINT8(device_endian, VirtIODevice),
879         VMSTATE_END_OF_LIST()
880     }
881 };
882 
883 static const VMStateDescription vmstate_virtio = {
884     .name = "virtio",
885     .version_id = 1,
886     .minimum_version_id = 1,
887     .minimum_version_id_old = 1,
888     .fields = (VMStateField[]) {
889         VMSTATE_END_OF_LIST()
890     },
891     .subsections = (VMStateSubsection[]) {
892         {
893             .vmsd = &vmstate_virtio_device_endian,
894             .needed = &virtio_device_endian_needed
895         },
896         { 0 }
897     }
898 };
899 
900 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
901 {
902     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
903     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
904     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
905     int i;
906 
907     if (k->save_config) {
908         k->save_config(qbus->parent, f);
909     }
910 
911     qemu_put_8s(f, &vdev->status);
912     qemu_put_8s(f, &vdev->isr);
913     qemu_put_be16s(f, &vdev->queue_sel);
914     qemu_put_be32s(f, &vdev->guest_features);
915     qemu_put_be32(f, vdev->config_len);
916     qemu_put_buffer(f, vdev->config, vdev->config_len);
917 
918     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
919         if (vdev->vq[i].vring.num == 0)
920             break;
921     }
922 
923     qemu_put_be32(f, i);
924 
925     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
926         if (vdev->vq[i].vring.num == 0)
927             break;
928 
929         qemu_put_be32(f, vdev->vq[i].vring.num);
930         if (k->has_variable_vring_alignment) {
931             qemu_put_be32(f, vdev->vq[i].vring.align);
932         }
933         qemu_put_be64(f, vdev->vq[i].pa);
934         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
935         if (k->save_queue) {
936             k->save_queue(qbus->parent, i, f);
937         }
938     }
939 
940     if (vdc->save != NULL) {
941         vdc->save(vdev, f);
942     }
943 
944     /* Subsections */
945     vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
946 }
947 
948 int virtio_set_features(VirtIODevice *vdev, uint32_t val)
949 {
950     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
951     VirtioBusClass *vbusk = VIRTIO_BUS_GET_CLASS(qbus);
952     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
953     uint32_t supported_features = vbusk->get_features(qbus->parent);
954     bool bad = (val & ~supported_features) != 0;
955 
956     val &= supported_features;
957     if (k->set_features) {
958         k->set_features(vdev, val);
959     }
960     vdev->guest_features = val;
961     return bad ? -1 : 0;
962 }
963 
964 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
965 {
966     int i, ret;
967     int32_t config_len;
968     uint32_t num;
969     uint32_t features;
970     uint32_t supported_features;
971     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
972     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
973     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
974 
975     /*
976      * We poison the endianness to ensure it does not get used before
977      * subsections have been loaded.
978      */
979     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
980 
981     if (k->load_config) {
982         ret = k->load_config(qbus->parent, f);
983         if (ret)
984             return ret;
985     }
986 
987     qemu_get_8s(f, &vdev->status);
988     qemu_get_8s(f, &vdev->isr);
989     qemu_get_be16s(f, &vdev->queue_sel);
990     if (vdev->queue_sel >= VIRTIO_PCI_QUEUE_MAX) {
991         return -1;
992     }
993     qemu_get_be32s(f, &features);
994 
995     if (virtio_set_features(vdev, features) < 0) {
996         supported_features = k->get_features(qbus->parent);
997         error_report("Features 0x%x unsupported. Allowed features: 0x%x",
998                      features, supported_features);
999         return -1;
1000     }
1001     config_len = qemu_get_be32(f);
1002 
1003     /*
1004      * There are cases where the incoming config can be bigger or smaller
1005      * than what we have; so load what we have space for, and skip
1006      * any excess that's in the stream.
1007      */
1008     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
1009 
1010     while (config_len > vdev->config_len) {
1011         qemu_get_byte(f);
1012         config_len--;
1013     }
1014 
1015     num = qemu_get_be32(f);
1016 
1017     if (num > VIRTIO_PCI_QUEUE_MAX) {
1018         error_report("Invalid number of PCI queues: 0x%x", num);
1019         return -1;
1020     }
1021 
1022     for (i = 0; i < num; i++) {
1023         vdev->vq[i].vring.num = qemu_get_be32(f);
1024         if (k->has_variable_vring_alignment) {
1025             vdev->vq[i].vring.align = qemu_get_be32(f);
1026         }
1027         vdev->vq[i].pa = qemu_get_be64(f);
1028         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
1029         vdev->vq[i].signalled_used_valid = false;
1030         vdev->vq[i].notification = true;
1031 
1032         if (vdev->vq[i].pa) {
1033             virtqueue_init(&vdev->vq[i]);
1034         } else if (vdev->vq[i].last_avail_idx) {
1035             error_report("VQ %d address 0x0 "
1036                          "inconsistent with Host index 0x%x",
1037                          i, vdev->vq[i].last_avail_idx);
1038                 return -1;
1039 	}
1040         if (k->load_queue) {
1041             ret = k->load_queue(qbus->parent, i, f);
1042             if (ret)
1043                 return ret;
1044         }
1045     }
1046 
1047     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1048 
1049     if (vdc->load != NULL) {
1050         ret = vdc->load(vdev, f, version_id);
1051         if (ret) {
1052             return ret;
1053         }
1054     }
1055 
1056     /* Subsections */
1057     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
1058     if (ret) {
1059         return ret;
1060     }
1061 
1062     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
1063         vdev->device_endian = virtio_default_endian();
1064     }
1065 
1066     for (i = 0; i < num; i++) {
1067         if (vdev->vq[i].pa) {
1068             uint16_t nheads;
1069             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
1070             /* Check it isn't doing strange things with descriptor numbers. */
1071             if (nheads > vdev->vq[i].vring.num) {
1072                 error_report("VQ %d size 0x%x Guest index 0x%x "
1073                              "inconsistent with Host index 0x%x: delta 0x%x",
1074                              i, vdev->vq[i].vring.num,
1075                              vring_avail_idx(&vdev->vq[i]),
1076                              vdev->vq[i].last_avail_idx, nheads);
1077                 return -1;
1078             }
1079         }
1080     }
1081 
1082     return 0;
1083 }
1084 
1085 void virtio_cleanup(VirtIODevice *vdev)
1086 {
1087     qemu_del_vm_change_state_handler(vdev->vmstate);
1088     g_free(vdev->config);
1089     g_free(vdev->vq);
1090 }
1091 
1092 static void virtio_vmstate_change(void *opaque, int running, RunState state)
1093 {
1094     VirtIODevice *vdev = opaque;
1095     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1096     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1097     bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
1098     vdev->vm_running = running;
1099 
1100     if (backend_run) {
1101         virtio_set_status(vdev, vdev->status);
1102     }
1103 
1104     if (k->vmstate_change) {
1105         k->vmstate_change(qbus->parent, backend_run);
1106     }
1107 
1108     if (!backend_run) {
1109         virtio_set_status(vdev, vdev->status);
1110     }
1111 }
1112 
1113 void virtio_instance_init_common(Object *proxy_obj, void *data,
1114                                  size_t vdev_size, const char *vdev_name)
1115 {
1116     DeviceState *vdev = data;
1117 
1118     object_initialize(vdev, vdev_size, vdev_name);
1119     object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
1120     object_unref(OBJECT(vdev));
1121     qdev_alias_all_properties(vdev, proxy_obj);
1122 }
1123 
1124 void virtio_init(VirtIODevice *vdev, const char *name,
1125                  uint16_t device_id, size_t config_size)
1126 {
1127     int i;
1128     vdev->device_id = device_id;
1129     vdev->status = 0;
1130     vdev->isr = 0;
1131     vdev->queue_sel = 0;
1132     vdev->config_vector = VIRTIO_NO_VECTOR;
1133     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
1134     vdev->vm_running = runstate_is_running();
1135     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
1136         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1137         vdev->vq[i].vdev = vdev;
1138         vdev->vq[i].queue_index = i;
1139     }
1140 
1141     vdev->name = name;
1142     vdev->config_len = config_size;
1143     if (vdev->config_len) {
1144         vdev->config = g_malloc0(config_size);
1145     } else {
1146         vdev->config = NULL;
1147     }
1148     vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
1149                                                      vdev);
1150     vdev->device_endian = virtio_default_endian();
1151 }
1152 
1153 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1154 {
1155     return vdev->vq[n].vring.desc;
1156 }
1157 
1158 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1159 {
1160     return vdev->vq[n].vring.avail;
1161 }
1162 
1163 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1164 {
1165     return vdev->vq[n].vring.used;
1166 }
1167 
1168 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
1169 {
1170     return vdev->vq[n].vring.desc;
1171 }
1172 
1173 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1174 {
1175     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1176 }
1177 
1178 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1179 {
1180     return offsetof(VRingAvail, ring) +
1181         sizeof(uint64_t) * vdev->vq[n].vring.num;
1182 }
1183 
1184 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1185 {
1186     return offsetof(VRingUsed, ring) +
1187         sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1188 }
1189 
1190 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
1191 {
1192     return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
1193 	    virtio_queue_get_used_size(vdev, n);
1194 }
1195 
1196 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1197 {
1198     return vdev->vq[n].last_avail_idx;
1199 }
1200 
1201 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1202 {
1203     vdev->vq[n].last_avail_idx = idx;
1204 }
1205 
1206 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
1207 {
1208     vdev->vq[n].signalled_used_valid = false;
1209 }
1210 
1211 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
1212 {
1213     return vdev->vq + n;
1214 }
1215 
1216 uint16_t virtio_get_queue_index(VirtQueue *vq)
1217 {
1218     return vq->queue_index;
1219 }
1220 
1221 static void virtio_queue_guest_notifier_read(EventNotifier *n)
1222 {
1223     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
1224     if (event_notifier_test_and_clear(n)) {
1225         virtio_irq(vq);
1226     }
1227 }
1228 
1229 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
1230                                                 bool with_irqfd)
1231 {
1232     if (assign && !with_irqfd) {
1233         event_notifier_set_handler(&vq->guest_notifier,
1234                                    virtio_queue_guest_notifier_read);
1235     } else {
1236         event_notifier_set_handler(&vq->guest_notifier, NULL);
1237     }
1238     if (!assign) {
1239         /* Test and clear notifier before closing it,
1240          * in case poll callback didn't have time to run. */
1241         virtio_queue_guest_notifier_read(&vq->guest_notifier);
1242     }
1243 }
1244 
1245 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
1246 {
1247     return &vq->guest_notifier;
1248 }
1249 
1250 static void virtio_queue_host_notifier_read(EventNotifier *n)
1251 {
1252     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1253     if (event_notifier_test_and_clear(n)) {
1254         virtio_queue_notify_vq(vq);
1255     }
1256 }
1257 
1258 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
1259                                                bool set_handler)
1260 {
1261     if (assign && set_handler) {
1262         event_notifier_set_handler(&vq->host_notifier,
1263                                    virtio_queue_host_notifier_read);
1264     } else {
1265         event_notifier_set_handler(&vq->host_notifier, NULL);
1266     }
1267     if (!assign) {
1268         /* Test and clear notifier before after disabling event,
1269          * in case poll callback didn't have time to run. */
1270         virtio_queue_host_notifier_read(&vq->host_notifier);
1271     }
1272 }
1273 
1274 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
1275 {
1276     return &vq->host_notifier;
1277 }
1278 
1279 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
1280 {
1281     g_free(vdev->bus_name);
1282     vdev->bus_name = g_strdup(bus_name);
1283 }
1284 
1285 static void virtio_device_realize(DeviceState *dev, Error **errp)
1286 {
1287     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1288     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1289     Error *err = NULL;
1290 
1291     if (vdc->realize != NULL) {
1292         vdc->realize(dev, &err);
1293         if (err != NULL) {
1294             error_propagate(errp, err);
1295             return;
1296         }
1297     }
1298     virtio_bus_device_plugged(vdev);
1299 }
1300 
1301 static void virtio_device_unrealize(DeviceState *dev, Error **errp)
1302 {
1303     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1304     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1305     Error *err = NULL;
1306 
1307     virtio_bus_device_unplugged(vdev);
1308 
1309     if (vdc->unrealize != NULL) {
1310         vdc->unrealize(dev, &err);
1311         if (err != NULL) {
1312             error_propagate(errp, err);
1313             return;
1314         }
1315     }
1316 
1317     g_free(vdev->bus_name);
1318     vdev->bus_name = NULL;
1319 }
1320 
1321 static void virtio_device_class_init(ObjectClass *klass, void *data)
1322 {
1323     /* Set the default value here. */
1324     DeviceClass *dc = DEVICE_CLASS(klass);
1325 
1326     dc->realize = virtio_device_realize;
1327     dc->unrealize = virtio_device_unrealize;
1328     dc->bus_type = TYPE_VIRTIO_BUS;
1329 }
1330 
1331 static const TypeInfo virtio_device_info = {
1332     .name = TYPE_VIRTIO_DEVICE,
1333     .parent = TYPE_DEVICE,
1334     .instance_size = sizeof(VirtIODevice),
1335     .class_init = virtio_device_class_init,
1336     .abstract = true,
1337     .class_size = sizeof(VirtioDeviceClass),
1338 };
1339 
1340 static void virtio_register_types(void)
1341 {
1342     type_register_static(&virtio_device_info);
1343 }
1344 
1345 type_init(virtio_register_types)
1346