xref: /openbmc/qemu/hw/virtio/virtio.c (revision 1d300b5f)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include <inttypes.h>
15 
16 #include "trace.h"
17 #include "qemu/error-report.h"
18 #include "hw/virtio/virtio.h"
19 #include "qemu/atomic.h"
20 #include "hw/virtio/virtio-bus.h"
21 
22 /*
23  * The alignment to use between consumer and producer parts of vring.
24  * x86 pagesize again. This is the default, used by transports like PCI
25  * which don't provide a means for the guest to tell the host the alignment.
26  */
27 #define VIRTIO_PCI_VRING_ALIGN         4096
28 
29 typedef struct VRingDesc
30 {
31     uint64_t addr;
32     uint32_t len;
33     uint16_t flags;
34     uint16_t next;
35 } VRingDesc;
36 
37 typedef struct VRingAvail
38 {
39     uint16_t flags;
40     uint16_t idx;
41     uint16_t ring[0];
42 } VRingAvail;
43 
44 typedef struct VRingUsedElem
45 {
46     uint32_t id;
47     uint32_t len;
48 } VRingUsedElem;
49 
50 typedef struct VRingUsed
51 {
52     uint16_t flags;
53     uint16_t idx;
54     VRingUsedElem ring[0];
55 } VRingUsed;
56 
57 typedef struct VRing
58 {
59     unsigned int num;
60     unsigned int align;
61     hwaddr desc;
62     hwaddr avail;
63     hwaddr used;
64 } VRing;
65 
66 struct VirtQueue
67 {
68     VRing vring;
69     hwaddr pa;
70     uint16_t last_avail_idx;
71     /* Last used index value we have signalled on */
72     uint16_t signalled_used;
73 
74     /* Last used index value we have signalled on */
75     bool signalled_used_valid;
76 
77     /* Notification enabled? */
78     bool notification;
79 
80     uint16_t queue_index;
81 
82     int inuse;
83 
84     uint16_t vector;
85     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
86     VirtIODevice *vdev;
87     EventNotifier guest_notifier;
88     EventNotifier host_notifier;
89 };
90 
91 /* virt queue functions */
92 static void virtqueue_init(VirtQueue *vq)
93 {
94     hwaddr pa = vq->pa;
95 
96     vq->vring.desc = pa;
97     vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
98     vq->vring.used = vring_align(vq->vring.avail +
99                                  offsetof(VRingAvail, ring[vq->vring.num]),
100                                  vq->vring.align);
101 }
102 
103 static inline uint64_t vring_desc_addr(hwaddr desc_pa, int i)
104 {
105     hwaddr pa;
106     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
107     return ldq_phys(pa);
108 }
109 
110 static inline uint32_t vring_desc_len(hwaddr desc_pa, int i)
111 {
112     hwaddr pa;
113     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
114     return ldl_phys(pa);
115 }
116 
117 static inline uint16_t vring_desc_flags(hwaddr desc_pa, int i)
118 {
119     hwaddr pa;
120     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
121     return lduw_phys(pa);
122 }
123 
124 static inline uint16_t vring_desc_next(hwaddr desc_pa, int i)
125 {
126     hwaddr pa;
127     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
128     return lduw_phys(pa);
129 }
130 
131 static inline uint16_t vring_avail_flags(VirtQueue *vq)
132 {
133     hwaddr pa;
134     pa = vq->vring.avail + offsetof(VRingAvail, flags);
135     return lduw_phys(pa);
136 }
137 
138 static inline uint16_t vring_avail_idx(VirtQueue *vq)
139 {
140     hwaddr pa;
141     pa = vq->vring.avail + offsetof(VRingAvail, idx);
142     return lduw_phys(pa);
143 }
144 
145 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
146 {
147     hwaddr pa;
148     pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
149     return lduw_phys(pa);
150 }
151 
152 static inline uint16_t vring_used_event(VirtQueue *vq)
153 {
154     return vring_avail_ring(vq, vq->vring.num);
155 }
156 
157 static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
158 {
159     hwaddr pa;
160     pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
161     stl_phys(pa, val);
162 }
163 
164 static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
165 {
166     hwaddr pa;
167     pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
168     stl_phys(pa, val);
169 }
170 
171 static uint16_t vring_used_idx(VirtQueue *vq)
172 {
173     hwaddr pa;
174     pa = vq->vring.used + offsetof(VRingUsed, idx);
175     return lduw_phys(pa);
176 }
177 
178 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
179 {
180     hwaddr pa;
181     pa = vq->vring.used + offsetof(VRingUsed, idx);
182     stw_phys(pa, val);
183 }
184 
185 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
186 {
187     hwaddr pa;
188     pa = vq->vring.used + offsetof(VRingUsed, flags);
189     stw_phys(pa, lduw_phys(pa) | mask);
190 }
191 
192 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
193 {
194     hwaddr pa;
195     pa = vq->vring.used + offsetof(VRingUsed, flags);
196     stw_phys(pa, lduw_phys(pa) & ~mask);
197 }
198 
199 static inline void vring_avail_event(VirtQueue *vq, uint16_t val)
200 {
201     hwaddr pa;
202     if (!vq->notification) {
203         return;
204     }
205     pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
206     stw_phys(pa, val);
207 }
208 
209 void virtio_queue_set_notification(VirtQueue *vq, int enable)
210 {
211     vq->notification = enable;
212     if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
213         vring_avail_event(vq, vring_avail_idx(vq));
214     } else if (enable) {
215         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
216     } else {
217         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
218     }
219     if (enable) {
220         /* Expose avail event/used flags before caller checks the avail idx. */
221         smp_mb();
222     }
223 }
224 
225 int virtio_queue_ready(VirtQueue *vq)
226 {
227     return vq->vring.avail != 0;
228 }
229 
230 int virtio_queue_empty(VirtQueue *vq)
231 {
232     return vring_avail_idx(vq) == vq->last_avail_idx;
233 }
234 
235 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
236                     unsigned int len, unsigned int idx)
237 {
238     unsigned int offset;
239     int i;
240 
241     trace_virtqueue_fill(vq, elem, len, idx);
242 
243     offset = 0;
244     for (i = 0; i < elem->in_num; i++) {
245         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
246 
247         cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
248                                   elem->in_sg[i].iov_len,
249                                   1, size);
250 
251         offset += size;
252     }
253 
254     for (i = 0; i < elem->out_num; i++)
255         cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
256                                   elem->out_sg[i].iov_len,
257                                   0, elem->out_sg[i].iov_len);
258 
259     idx = (idx + vring_used_idx(vq)) % vq->vring.num;
260 
261     /* Get a pointer to the next entry in the used ring. */
262     vring_used_ring_id(vq, idx, elem->index);
263     vring_used_ring_len(vq, idx, len);
264 }
265 
266 void virtqueue_flush(VirtQueue *vq, unsigned int count)
267 {
268     uint16_t old, new;
269     /* Make sure buffer is written before we update index. */
270     smp_wmb();
271     trace_virtqueue_flush(vq, count);
272     old = vring_used_idx(vq);
273     new = old + count;
274     vring_used_idx_set(vq, new);
275     vq->inuse -= count;
276     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
277         vq->signalled_used_valid = false;
278 }
279 
280 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
281                     unsigned int len)
282 {
283     virtqueue_fill(vq, elem, len, 0);
284     virtqueue_flush(vq, 1);
285 }
286 
287 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
288 {
289     uint16_t num_heads = vring_avail_idx(vq) - idx;
290 
291     /* Check it isn't doing very strange things with descriptor numbers. */
292     if (num_heads > vq->vring.num) {
293         error_report("Guest moved used index from %u to %u",
294                      idx, vring_avail_idx(vq));
295         exit(1);
296     }
297     /* On success, callers read a descriptor at vq->last_avail_idx.
298      * Make sure descriptor read does not bypass avail index read. */
299     if (num_heads) {
300         smp_rmb();
301     }
302 
303     return num_heads;
304 }
305 
306 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
307 {
308     unsigned int head;
309 
310     /* Grab the next descriptor number they're advertising, and increment
311      * the index we've seen. */
312     head = vring_avail_ring(vq, idx % vq->vring.num);
313 
314     /* If their number is silly, that's a fatal mistake. */
315     if (head >= vq->vring.num) {
316         error_report("Guest says index %u is available", head);
317         exit(1);
318     }
319 
320     return head;
321 }
322 
323 static unsigned virtqueue_next_desc(hwaddr desc_pa,
324                                     unsigned int i, unsigned int max)
325 {
326     unsigned int next;
327 
328     /* If this descriptor says it doesn't chain, we're done. */
329     if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
330         return max;
331 
332     /* Check they're not leading us off end of descriptors. */
333     next = vring_desc_next(desc_pa, i);
334     /* Make sure compiler knows to grab that: we don't want it changing! */
335     smp_wmb();
336 
337     if (next >= max) {
338         error_report("Desc next is %u", next);
339         exit(1);
340     }
341 
342     return next;
343 }
344 
345 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
346                                unsigned int *out_bytes,
347                                unsigned max_in_bytes, unsigned max_out_bytes)
348 {
349     unsigned int idx;
350     unsigned int total_bufs, in_total, out_total;
351 
352     idx = vq->last_avail_idx;
353 
354     total_bufs = in_total = out_total = 0;
355     while (virtqueue_num_heads(vq, idx)) {
356         unsigned int max, num_bufs, indirect = 0;
357         hwaddr desc_pa;
358         int i;
359 
360         max = vq->vring.num;
361         num_bufs = total_bufs;
362         i = virtqueue_get_head(vq, idx++);
363         desc_pa = vq->vring.desc;
364 
365         if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
366             if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
367                 error_report("Invalid size for indirect buffer table");
368                 exit(1);
369             }
370 
371             /* If we've got too many, that implies a descriptor loop. */
372             if (num_bufs >= max) {
373                 error_report("Looped descriptor");
374                 exit(1);
375             }
376 
377             /* loop over the indirect descriptor table */
378             indirect = 1;
379             max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
380             num_bufs = i = 0;
381             desc_pa = vring_desc_addr(desc_pa, i);
382         }
383 
384         do {
385             /* If we've got too many, that implies a descriptor loop. */
386             if (++num_bufs > max) {
387                 error_report("Looped descriptor");
388                 exit(1);
389             }
390 
391             if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
392                 in_total += vring_desc_len(desc_pa, i);
393             } else {
394                 out_total += vring_desc_len(desc_pa, i);
395             }
396             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
397                 goto done;
398             }
399         } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
400 
401         if (!indirect)
402             total_bufs = num_bufs;
403         else
404             total_bufs++;
405     }
406 done:
407     if (in_bytes) {
408         *in_bytes = in_total;
409     }
410     if (out_bytes) {
411         *out_bytes = out_total;
412     }
413 }
414 
415 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
416                           unsigned int out_bytes)
417 {
418     unsigned int in_total, out_total;
419 
420     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
421     return in_bytes <= in_total && out_bytes <= out_total;
422 }
423 
424 void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
425     size_t num_sg, int is_write)
426 {
427     unsigned int i;
428     hwaddr len;
429 
430     for (i = 0; i < num_sg; i++) {
431         len = sg[i].iov_len;
432         sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
433         if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
434             error_report("virtio: trying to map MMIO memory");
435             exit(1);
436         }
437     }
438 }
439 
440 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
441 {
442     unsigned int i, head, max;
443     hwaddr desc_pa = vq->vring.desc;
444 
445     if (!virtqueue_num_heads(vq, vq->last_avail_idx))
446         return 0;
447 
448     /* When we start there are none of either input nor output. */
449     elem->out_num = elem->in_num = 0;
450 
451     max = vq->vring.num;
452 
453     i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
454     if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
455         vring_avail_event(vq, vring_avail_idx(vq));
456     }
457 
458     if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
459         if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
460             error_report("Invalid size for indirect buffer table");
461             exit(1);
462         }
463 
464         /* loop over the indirect descriptor table */
465         max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
466         desc_pa = vring_desc_addr(desc_pa, i);
467         i = 0;
468     }
469 
470     /* Collect all the descriptors */
471     do {
472         struct iovec *sg;
473 
474         if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
475             if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
476                 error_report("Too many write descriptors in indirect table");
477                 exit(1);
478             }
479             elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
480             sg = &elem->in_sg[elem->in_num++];
481         } else {
482             if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
483                 error_report("Too many read descriptors in indirect table");
484                 exit(1);
485             }
486             elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
487             sg = &elem->out_sg[elem->out_num++];
488         }
489 
490         sg->iov_len = vring_desc_len(desc_pa, i);
491 
492         /* If we've got too many, that implies a descriptor loop. */
493         if ((elem->in_num + elem->out_num) > max) {
494             error_report("Looped descriptor");
495             exit(1);
496         }
497     } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
498 
499     /* Now map what we have collected */
500     virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
501     virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
502 
503     elem->index = head;
504 
505     vq->inuse++;
506 
507     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
508     return elem->in_num + elem->out_num;
509 }
510 
511 /* virtio device */
512 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
513 {
514     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
515     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
516 
517     if (k->notify) {
518         k->notify(qbus->parent, vector);
519     }
520 }
521 
522 void virtio_update_irq(VirtIODevice *vdev)
523 {
524     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
525 }
526 
527 void virtio_set_status(VirtIODevice *vdev, uint8_t val)
528 {
529     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
530     trace_virtio_set_status(vdev, val);
531 
532     if (k->set_status) {
533         k->set_status(vdev, val);
534     }
535     vdev->status = val;
536 }
537 
538 void virtio_reset(void *opaque)
539 {
540     VirtIODevice *vdev = opaque;
541     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
542     int i;
543 
544     virtio_set_status(vdev, 0);
545 
546     if (k->reset) {
547         k->reset(vdev);
548     }
549 
550     vdev->guest_features = 0;
551     vdev->queue_sel = 0;
552     vdev->status = 0;
553     vdev->isr = 0;
554     vdev->config_vector = VIRTIO_NO_VECTOR;
555     virtio_notify_vector(vdev, vdev->config_vector);
556 
557     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
558         vdev->vq[i].vring.desc = 0;
559         vdev->vq[i].vring.avail = 0;
560         vdev->vq[i].vring.used = 0;
561         vdev->vq[i].last_avail_idx = 0;
562         vdev->vq[i].pa = 0;
563         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
564         vdev->vq[i].signalled_used = 0;
565         vdev->vq[i].signalled_used_valid = false;
566         vdev->vq[i].notification = true;
567     }
568 }
569 
570 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
571 {
572     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
573     uint8_t val;
574 
575     if (addr + sizeof(val) > vdev->config_len) {
576         return (uint32_t)-1;
577     }
578 
579     k->get_config(vdev, vdev->config);
580 
581     val = ldub_p(vdev->config + addr);
582     return val;
583 }
584 
585 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
586 {
587     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
588     uint16_t val;
589 
590     if (addr + sizeof(val) > vdev->config_len) {
591         return (uint32_t)-1;
592     }
593 
594     k->get_config(vdev, vdev->config);
595 
596     val = lduw_p(vdev->config + addr);
597     return val;
598 }
599 
600 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
601 {
602     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
603     uint32_t val;
604 
605     if (addr + sizeof(val) > vdev->config_len) {
606         return (uint32_t)-1;
607     }
608 
609     k->get_config(vdev, vdev->config);
610 
611     val = ldl_p(vdev->config + addr);
612     return val;
613 }
614 
615 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
616 {
617     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
618     uint8_t val = data;
619 
620     if (addr + sizeof(val) > vdev->config_len) {
621         return;
622     }
623 
624     stb_p(vdev->config + addr, val);
625 
626     if (k->set_config) {
627         k->set_config(vdev, vdev->config);
628     }
629 }
630 
631 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
632 {
633     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
634     uint16_t val = data;
635 
636     if (addr + sizeof(val) > vdev->config_len) {
637         return;
638     }
639 
640     stw_p(vdev->config + addr, val);
641 
642     if (k->set_config) {
643         k->set_config(vdev, vdev->config);
644     }
645 }
646 
647 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
648 {
649     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
650     uint32_t val = data;
651 
652     if (addr + sizeof(val) > vdev->config_len) {
653         return;
654     }
655 
656     stl_p(vdev->config + addr, val);
657 
658     if (k->set_config) {
659         k->set_config(vdev, vdev->config);
660     }
661 }
662 
663 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
664 {
665     vdev->vq[n].pa = addr;
666     virtqueue_init(&vdev->vq[n]);
667 }
668 
669 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
670 {
671     return vdev->vq[n].pa;
672 }
673 
674 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
675 {
676     if (num <= VIRTQUEUE_MAX_SIZE) {
677         vdev->vq[n].vring.num = num;
678         virtqueue_init(&vdev->vq[n]);
679     }
680 }
681 
682 int virtio_queue_get_num(VirtIODevice *vdev, int n)
683 {
684     return vdev->vq[n].vring.num;
685 }
686 
687 int virtio_queue_get_id(VirtQueue *vq)
688 {
689     VirtIODevice *vdev = vq->vdev;
690     assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]);
691     return vq - &vdev->vq[0];
692 }
693 
694 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
695 {
696     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
697     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
698 
699     /* Check that the transport told us it was going to do this
700      * (so a buggy transport will immediately assert rather than
701      * silently failing to migrate this state)
702      */
703     assert(k->has_variable_vring_alignment);
704 
705     vdev->vq[n].vring.align = align;
706     virtqueue_init(&vdev->vq[n]);
707 }
708 
709 void virtio_queue_notify_vq(VirtQueue *vq)
710 {
711     if (vq->vring.desc) {
712         VirtIODevice *vdev = vq->vdev;
713         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
714         vq->handle_output(vdev, vq);
715     }
716 }
717 
718 void virtio_queue_notify(VirtIODevice *vdev, int n)
719 {
720     virtio_queue_notify_vq(&vdev->vq[n]);
721 }
722 
723 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
724 {
725     return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
726         VIRTIO_NO_VECTOR;
727 }
728 
729 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
730 {
731     if (n < VIRTIO_PCI_QUEUE_MAX)
732         vdev->vq[n].vector = vector;
733 }
734 
735 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
736                             void (*handle_output)(VirtIODevice *, VirtQueue *))
737 {
738     int i;
739 
740     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
741         if (vdev->vq[i].vring.num == 0)
742             break;
743     }
744 
745     if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
746         abort();
747 
748     vdev->vq[i].vring.num = queue_size;
749     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
750     vdev->vq[i].handle_output = handle_output;
751 
752     return &vdev->vq[i];
753 }
754 
755 void virtio_del_queue(VirtIODevice *vdev, int n)
756 {
757     if (n < 0 || n >= VIRTIO_PCI_QUEUE_MAX) {
758         abort();
759     }
760 
761     vdev->vq[n].vring.num = 0;
762 }
763 
764 void virtio_irq(VirtQueue *vq)
765 {
766     trace_virtio_irq(vq);
767     vq->vdev->isr |= 0x01;
768     virtio_notify_vector(vq->vdev, vq->vector);
769 }
770 
771 /* Assuming a given event_idx value from the other size, if
772  * we have just incremented index from old to new_idx,
773  * should we trigger an event? */
774 static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old)
775 {
776 	/* Note: Xen has similar logic for notification hold-off
777 	 * in include/xen/interface/io/ring.h with req_event and req_prod
778 	 * corresponding to event_idx + 1 and new respectively.
779 	 * Note also that req_event and req_prod in Xen start at 1,
780 	 * event indexes in virtio start at 0. */
781 	return (uint16_t)(new - event - 1) < (uint16_t)(new - old);
782 }
783 
784 static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
785 {
786     uint16_t old, new;
787     bool v;
788     /* We need to expose used array entries before checking used event. */
789     smp_mb();
790     /* Always notify when queue is empty (when feature acknowledge) */
791     if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) &&
792          !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) {
793         return true;
794     }
795 
796     if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
797         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
798     }
799 
800     v = vq->signalled_used_valid;
801     vq->signalled_used_valid = true;
802     old = vq->signalled_used;
803     new = vq->signalled_used = vring_used_idx(vq);
804     return !v || vring_need_event(vring_used_event(vq), new, old);
805 }
806 
807 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
808 {
809     if (!vring_notify(vdev, vq)) {
810         return;
811     }
812 
813     trace_virtio_notify(vdev, vq);
814     vdev->isr |= 0x01;
815     virtio_notify_vector(vdev, vq->vector);
816 }
817 
818 void virtio_notify_config(VirtIODevice *vdev)
819 {
820     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
821         return;
822 
823     vdev->isr |= 0x03;
824     virtio_notify_vector(vdev, vdev->config_vector);
825 }
826 
827 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
828 {
829     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
830     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
831     int i;
832 
833     if (k->save_config) {
834         k->save_config(qbus->parent, f);
835     }
836 
837     qemu_put_8s(f, &vdev->status);
838     qemu_put_8s(f, &vdev->isr);
839     qemu_put_be16s(f, &vdev->queue_sel);
840     qemu_put_be32s(f, &vdev->guest_features);
841     qemu_put_be32(f, vdev->config_len);
842     qemu_put_buffer(f, vdev->config, vdev->config_len);
843 
844     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
845         if (vdev->vq[i].vring.num == 0)
846             break;
847     }
848 
849     qemu_put_be32(f, i);
850 
851     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
852         if (vdev->vq[i].vring.num == 0)
853             break;
854 
855         qemu_put_be32(f, vdev->vq[i].vring.num);
856         if (k->has_variable_vring_alignment) {
857             qemu_put_be32(f, vdev->vq[i].vring.align);
858         }
859         qemu_put_be64(f, vdev->vq[i].pa);
860         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
861         if (k->save_queue) {
862             k->save_queue(qbus->parent, i, f);
863         }
864     }
865 }
866 
867 int virtio_set_features(VirtIODevice *vdev, uint32_t val)
868 {
869     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
870     VirtioBusClass *vbusk = VIRTIO_BUS_GET_CLASS(qbus);
871     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
872     uint32_t supported_features = vbusk->get_features(qbus->parent);
873     bool bad = (val & ~supported_features) != 0;
874 
875     val &= supported_features;
876     if (k->set_features) {
877         k->set_features(vdev, val);
878     }
879     vdev->guest_features = val;
880     return bad ? -1 : 0;
881 }
882 
883 int virtio_load(VirtIODevice *vdev, QEMUFile *f)
884 {
885     int num, i, ret;
886     uint32_t features;
887     uint32_t supported_features;
888     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
889     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
890 
891     if (k->load_config) {
892         ret = k->load_config(qbus->parent, f);
893         if (ret)
894             return ret;
895     }
896 
897     qemu_get_8s(f, &vdev->status);
898     qemu_get_8s(f, &vdev->isr);
899     qemu_get_be16s(f, &vdev->queue_sel);
900     qemu_get_be32s(f, &features);
901 
902     if (virtio_set_features(vdev, features) < 0) {
903         supported_features = k->get_features(qbus->parent);
904         error_report("Features 0x%x unsupported. Allowed features: 0x%x",
905                      features, supported_features);
906         return -1;
907     }
908     vdev->config_len = qemu_get_be32(f);
909     qemu_get_buffer(f, vdev->config, vdev->config_len);
910 
911     num = qemu_get_be32(f);
912 
913     for (i = 0; i < num; i++) {
914         vdev->vq[i].vring.num = qemu_get_be32(f);
915         if (k->has_variable_vring_alignment) {
916             vdev->vq[i].vring.align = qemu_get_be32(f);
917         }
918         vdev->vq[i].pa = qemu_get_be64(f);
919         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
920         vdev->vq[i].signalled_used_valid = false;
921         vdev->vq[i].notification = true;
922 
923         if (vdev->vq[i].pa) {
924             uint16_t nheads;
925             virtqueue_init(&vdev->vq[i]);
926             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
927             /* Check it isn't doing very strange things with descriptor numbers. */
928             if (nheads > vdev->vq[i].vring.num) {
929                 error_report("VQ %d size 0x%x Guest index 0x%x "
930                              "inconsistent with Host index 0x%x: delta 0x%x",
931                              i, vdev->vq[i].vring.num,
932                              vring_avail_idx(&vdev->vq[i]),
933                              vdev->vq[i].last_avail_idx, nheads);
934                 return -1;
935             }
936         } else if (vdev->vq[i].last_avail_idx) {
937             error_report("VQ %d address 0x0 "
938                          "inconsistent with Host index 0x%x",
939                          i, vdev->vq[i].last_avail_idx);
940                 return -1;
941 	}
942         if (k->load_queue) {
943             ret = k->load_queue(qbus->parent, i, f);
944             if (ret)
945                 return ret;
946         }
947     }
948 
949     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
950     return 0;
951 }
952 
953 void virtio_cleanup(VirtIODevice *vdev)
954 {
955     qemu_del_vm_change_state_handler(vdev->vmstate);
956     g_free(vdev->config);
957     g_free(vdev->vq);
958 }
959 
960 static void virtio_vmstate_change(void *opaque, int running, RunState state)
961 {
962     VirtIODevice *vdev = opaque;
963     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
964     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
965     bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
966     vdev->vm_running = running;
967 
968     if (backend_run) {
969         virtio_set_status(vdev, vdev->status);
970     }
971 
972     if (k->vmstate_change) {
973         k->vmstate_change(qbus->parent, backend_run);
974     }
975 
976     if (!backend_run) {
977         virtio_set_status(vdev, vdev->status);
978     }
979 }
980 
981 void virtio_init(VirtIODevice *vdev, const char *name,
982                  uint16_t device_id, size_t config_size)
983 {
984     int i;
985     vdev->device_id = device_id;
986     vdev->status = 0;
987     vdev->isr = 0;
988     vdev->queue_sel = 0;
989     vdev->config_vector = VIRTIO_NO_VECTOR;
990     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
991     vdev->vm_running = runstate_is_running();
992     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
993         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
994         vdev->vq[i].vdev = vdev;
995         vdev->vq[i].queue_index = i;
996     }
997 
998     vdev->name = name;
999     vdev->config_len = config_size;
1000     if (vdev->config_len) {
1001         vdev->config = g_malloc0(config_size);
1002     } else {
1003         vdev->config = NULL;
1004     }
1005     vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
1006                                                      vdev);
1007 }
1008 
1009 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1010 {
1011     return vdev->vq[n].vring.desc;
1012 }
1013 
1014 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1015 {
1016     return vdev->vq[n].vring.avail;
1017 }
1018 
1019 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1020 {
1021     return vdev->vq[n].vring.used;
1022 }
1023 
1024 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
1025 {
1026     return vdev->vq[n].vring.desc;
1027 }
1028 
1029 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1030 {
1031     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1032 }
1033 
1034 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1035 {
1036     return offsetof(VRingAvail, ring) +
1037         sizeof(uint64_t) * vdev->vq[n].vring.num;
1038 }
1039 
1040 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1041 {
1042     return offsetof(VRingUsed, ring) +
1043         sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1044 }
1045 
1046 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
1047 {
1048     return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
1049 	    virtio_queue_get_used_size(vdev, n);
1050 }
1051 
1052 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1053 {
1054     return vdev->vq[n].last_avail_idx;
1055 }
1056 
1057 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1058 {
1059     vdev->vq[n].last_avail_idx = idx;
1060 }
1061 
1062 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
1063 {
1064     return vdev->vq + n;
1065 }
1066 
1067 uint16_t virtio_get_queue_index(VirtQueue *vq)
1068 {
1069     return vq->queue_index;
1070 }
1071 
1072 static void virtio_queue_guest_notifier_read(EventNotifier *n)
1073 {
1074     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
1075     if (event_notifier_test_and_clear(n)) {
1076         virtio_irq(vq);
1077     }
1078 }
1079 
1080 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
1081                                                 bool with_irqfd)
1082 {
1083     if (assign && !with_irqfd) {
1084         event_notifier_set_handler(&vq->guest_notifier,
1085                                    virtio_queue_guest_notifier_read);
1086     } else {
1087         event_notifier_set_handler(&vq->guest_notifier, NULL);
1088     }
1089     if (!assign) {
1090         /* Test and clear notifier before closing it,
1091          * in case poll callback didn't have time to run. */
1092         virtio_queue_guest_notifier_read(&vq->guest_notifier);
1093     }
1094 }
1095 
1096 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
1097 {
1098     return &vq->guest_notifier;
1099 }
1100 
1101 static void virtio_queue_host_notifier_read(EventNotifier *n)
1102 {
1103     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1104     if (event_notifier_test_and_clear(n)) {
1105         virtio_queue_notify_vq(vq);
1106     }
1107 }
1108 
1109 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
1110                                                bool set_handler)
1111 {
1112     if (assign && set_handler) {
1113         event_notifier_set_handler(&vq->host_notifier,
1114                                    virtio_queue_host_notifier_read);
1115     } else {
1116         event_notifier_set_handler(&vq->host_notifier, NULL);
1117     }
1118     if (!assign) {
1119         /* Test and clear notifier before after disabling event,
1120          * in case poll callback didn't have time to run. */
1121         virtio_queue_host_notifier_read(&vq->host_notifier);
1122     }
1123 }
1124 
1125 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
1126 {
1127     return &vq->host_notifier;
1128 }
1129 
1130 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
1131 {
1132     if (vdev->bus_name) {
1133         g_free(vdev->bus_name);
1134         vdev->bus_name = NULL;
1135     }
1136 
1137     if (bus_name) {
1138         vdev->bus_name = g_strdup(bus_name);
1139     }
1140 }
1141 
1142 static int virtio_device_init(DeviceState *qdev)
1143 {
1144     VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
1145     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(qdev);
1146     assert(k->init != NULL);
1147     if (k->init(vdev) < 0) {
1148         return -1;
1149     }
1150     virtio_bus_plug_device(vdev);
1151     return 0;
1152 }
1153 
1154 static int virtio_device_exit(DeviceState *qdev)
1155 {
1156     VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
1157 
1158     if (vdev->bus_name) {
1159         g_free(vdev->bus_name);
1160         vdev->bus_name = NULL;
1161     }
1162     return 0;
1163 }
1164 
1165 static void virtio_device_class_init(ObjectClass *klass, void *data)
1166 {
1167     /* Set the default value here. */
1168     DeviceClass *dc = DEVICE_CLASS(klass);
1169     dc->init = virtio_device_init;
1170     dc->exit = virtio_device_exit;
1171     dc->bus_type = TYPE_VIRTIO_BUS;
1172 }
1173 
1174 static const TypeInfo virtio_device_info = {
1175     .name = TYPE_VIRTIO_DEVICE,
1176     .parent = TYPE_DEVICE,
1177     .instance_size = sizeof(VirtIODevice),
1178     .class_init = virtio_device_class_init,
1179     .abstract = true,
1180     .class_size = sizeof(VirtioDeviceClass),
1181 };
1182 
1183 static void virtio_register_types(void)
1184 {
1185     type_register_static(&virtio_device_info);
1186 }
1187 
1188 type_init(virtio_register_types)
1189