xref: /openbmc/qemu/hw/virtio/virtio.c (revision 181103cd)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include <inttypes.h>
15 
16 #include "trace.h"
17 #include "qemu/error-report.h"
18 #include "hw/virtio/virtio.h"
19 #include "qemu/atomic.h"
20 #include "hw/virtio/virtio-bus.h"
21 
22 /* The alignment to use between consumer and producer parts of vring.
23  * x86 pagesize again. */
24 #define VIRTIO_PCI_VRING_ALIGN         4096
25 
26 typedef struct VRingDesc
27 {
28     uint64_t addr;
29     uint32_t len;
30     uint16_t flags;
31     uint16_t next;
32 } VRingDesc;
33 
34 typedef struct VRingAvail
35 {
36     uint16_t flags;
37     uint16_t idx;
38     uint16_t ring[0];
39 } VRingAvail;
40 
41 typedef struct VRingUsedElem
42 {
43     uint32_t id;
44     uint32_t len;
45 } VRingUsedElem;
46 
47 typedef struct VRingUsed
48 {
49     uint16_t flags;
50     uint16_t idx;
51     VRingUsedElem ring[0];
52 } VRingUsed;
53 
54 typedef struct VRing
55 {
56     unsigned int num;
57     hwaddr desc;
58     hwaddr avail;
59     hwaddr used;
60 } VRing;
61 
62 struct VirtQueue
63 {
64     VRing vring;
65     hwaddr pa;
66     uint16_t last_avail_idx;
67     /* Last used index value we have signalled on */
68     uint16_t signalled_used;
69 
70     /* Last used index value we have signalled on */
71     bool signalled_used_valid;
72 
73     /* Notification enabled? */
74     bool notification;
75 
76     uint16_t queue_index;
77 
78     int inuse;
79 
80     uint16_t vector;
81     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
82     VirtIODevice *vdev;
83     EventNotifier guest_notifier;
84     EventNotifier host_notifier;
85 };
86 
87 /* virt queue functions */
88 static void virtqueue_init(VirtQueue *vq)
89 {
90     hwaddr pa = vq->pa;
91 
92     vq->vring.desc = pa;
93     vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
94     vq->vring.used = vring_align(vq->vring.avail +
95                                  offsetof(VRingAvail, ring[vq->vring.num]),
96                                  VIRTIO_PCI_VRING_ALIGN);
97 }
98 
99 static inline uint64_t vring_desc_addr(hwaddr desc_pa, int i)
100 {
101     hwaddr pa;
102     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
103     return ldq_phys(pa);
104 }
105 
106 static inline uint32_t vring_desc_len(hwaddr desc_pa, int i)
107 {
108     hwaddr pa;
109     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
110     return ldl_phys(pa);
111 }
112 
113 static inline uint16_t vring_desc_flags(hwaddr desc_pa, int i)
114 {
115     hwaddr pa;
116     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
117     return lduw_phys(pa);
118 }
119 
120 static inline uint16_t vring_desc_next(hwaddr desc_pa, int i)
121 {
122     hwaddr pa;
123     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
124     return lduw_phys(pa);
125 }
126 
127 static inline uint16_t vring_avail_flags(VirtQueue *vq)
128 {
129     hwaddr pa;
130     pa = vq->vring.avail + offsetof(VRingAvail, flags);
131     return lduw_phys(pa);
132 }
133 
134 static inline uint16_t vring_avail_idx(VirtQueue *vq)
135 {
136     hwaddr pa;
137     pa = vq->vring.avail + offsetof(VRingAvail, idx);
138     return lduw_phys(pa);
139 }
140 
141 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
142 {
143     hwaddr pa;
144     pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
145     return lduw_phys(pa);
146 }
147 
148 static inline uint16_t vring_used_event(VirtQueue *vq)
149 {
150     return vring_avail_ring(vq, vq->vring.num);
151 }
152 
153 static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
154 {
155     hwaddr pa;
156     pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
157     stl_phys(pa, val);
158 }
159 
160 static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
161 {
162     hwaddr pa;
163     pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
164     stl_phys(pa, val);
165 }
166 
167 static uint16_t vring_used_idx(VirtQueue *vq)
168 {
169     hwaddr pa;
170     pa = vq->vring.used + offsetof(VRingUsed, idx);
171     return lduw_phys(pa);
172 }
173 
174 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
175 {
176     hwaddr pa;
177     pa = vq->vring.used + offsetof(VRingUsed, idx);
178     stw_phys(pa, val);
179 }
180 
181 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
182 {
183     hwaddr pa;
184     pa = vq->vring.used + offsetof(VRingUsed, flags);
185     stw_phys(pa, lduw_phys(pa) | mask);
186 }
187 
188 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
189 {
190     hwaddr pa;
191     pa = vq->vring.used + offsetof(VRingUsed, flags);
192     stw_phys(pa, lduw_phys(pa) & ~mask);
193 }
194 
195 static inline void vring_avail_event(VirtQueue *vq, uint16_t val)
196 {
197     hwaddr pa;
198     if (!vq->notification) {
199         return;
200     }
201     pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
202     stw_phys(pa, val);
203 }
204 
205 void virtio_queue_set_notification(VirtQueue *vq, int enable)
206 {
207     vq->notification = enable;
208     if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
209         vring_avail_event(vq, vring_avail_idx(vq));
210     } else if (enable) {
211         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
212     } else {
213         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
214     }
215     if (enable) {
216         /* Expose avail event/used flags before caller checks the avail idx. */
217         smp_mb();
218     }
219 }
220 
221 int virtio_queue_ready(VirtQueue *vq)
222 {
223     return vq->vring.avail != 0;
224 }
225 
226 int virtio_queue_empty(VirtQueue *vq)
227 {
228     return vring_avail_idx(vq) == vq->last_avail_idx;
229 }
230 
231 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
232                     unsigned int len, unsigned int idx)
233 {
234     unsigned int offset;
235     int i;
236 
237     trace_virtqueue_fill(vq, elem, len, idx);
238 
239     offset = 0;
240     for (i = 0; i < elem->in_num; i++) {
241         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
242 
243         cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
244                                   elem->in_sg[i].iov_len,
245                                   1, size);
246 
247         offset += size;
248     }
249 
250     for (i = 0; i < elem->out_num; i++)
251         cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
252                                   elem->out_sg[i].iov_len,
253                                   0, elem->out_sg[i].iov_len);
254 
255     idx = (idx + vring_used_idx(vq)) % vq->vring.num;
256 
257     /* Get a pointer to the next entry in the used ring. */
258     vring_used_ring_id(vq, idx, elem->index);
259     vring_used_ring_len(vq, idx, len);
260 }
261 
262 void virtqueue_flush(VirtQueue *vq, unsigned int count)
263 {
264     uint16_t old, new;
265     /* Make sure buffer is written before we update index. */
266     smp_wmb();
267     trace_virtqueue_flush(vq, count);
268     old = vring_used_idx(vq);
269     new = old + count;
270     vring_used_idx_set(vq, new);
271     vq->inuse -= count;
272     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
273         vq->signalled_used_valid = false;
274 }
275 
276 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
277                     unsigned int len)
278 {
279     virtqueue_fill(vq, elem, len, 0);
280     virtqueue_flush(vq, 1);
281 }
282 
283 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
284 {
285     uint16_t num_heads = vring_avail_idx(vq) - idx;
286 
287     /* Check it isn't doing very strange things with descriptor numbers. */
288     if (num_heads > vq->vring.num) {
289         error_report("Guest moved used index from %u to %u",
290                      idx, vring_avail_idx(vq));
291         exit(1);
292     }
293     /* On success, callers read a descriptor at vq->last_avail_idx.
294      * Make sure descriptor read does not bypass avail index read. */
295     if (num_heads) {
296         smp_rmb();
297     }
298 
299     return num_heads;
300 }
301 
302 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
303 {
304     unsigned int head;
305 
306     /* Grab the next descriptor number they're advertising, and increment
307      * the index we've seen. */
308     head = vring_avail_ring(vq, idx % vq->vring.num);
309 
310     /* If their number is silly, that's a fatal mistake. */
311     if (head >= vq->vring.num) {
312         error_report("Guest says index %u is available", head);
313         exit(1);
314     }
315 
316     return head;
317 }
318 
319 static unsigned virtqueue_next_desc(hwaddr desc_pa,
320                                     unsigned int i, unsigned int max)
321 {
322     unsigned int next;
323 
324     /* If this descriptor says it doesn't chain, we're done. */
325     if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
326         return max;
327 
328     /* Check they're not leading us off end of descriptors. */
329     next = vring_desc_next(desc_pa, i);
330     /* Make sure compiler knows to grab that: we don't want it changing! */
331     smp_wmb();
332 
333     if (next >= max) {
334         error_report("Desc next is %u", next);
335         exit(1);
336     }
337 
338     return next;
339 }
340 
341 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
342                                unsigned int *out_bytes,
343                                unsigned max_in_bytes, unsigned max_out_bytes)
344 {
345     unsigned int idx;
346     unsigned int total_bufs, in_total, out_total;
347 
348     idx = vq->last_avail_idx;
349 
350     total_bufs = in_total = out_total = 0;
351     while (virtqueue_num_heads(vq, idx)) {
352         unsigned int max, num_bufs, indirect = 0;
353         hwaddr desc_pa;
354         int i;
355 
356         max = vq->vring.num;
357         num_bufs = total_bufs;
358         i = virtqueue_get_head(vq, idx++);
359         desc_pa = vq->vring.desc;
360 
361         if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
362             if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
363                 error_report("Invalid size for indirect buffer table");
364                 exit(1);
365             }
366 
367             /* If we've got too many, that implies a descriptor loop. */
368             if (num_bufs >= max) {
369                 error_report("Looped descriptor");
370                 exit(1);
371             }
372 
373             /* loop over the indirect descriptor table */
374             indirect = 1;
375             max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
376             num_bufs = i = 0;
377             desc_pa = vring_desc_addr(desc_pa, i);
378         }
379 
380         do {
381             /* If we've got too many, that implies a descriptor loop. */
382             if (++num_bufs > max) {
383                 error_report("Looped descriptor");
384                 exit(1);
385             }
386 
387             if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
388                 in_total += vring_desc_len(desc_pa, i);
389             } else {
390                 out_total += vring_desc_len(desc_pa, i);
391             }
392             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
393                 goto done;
394             }
395         } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
396 
397         if (!indirect)
398             total_bufs = num_bufs;
399         else
400             total_bufs++;
401     }
402 done:
403     if (in_bytes) {
404         *in_bytes = in_total;
405     }
406     if (out_bytes) {
407         *out_bytes = out_total;
408     }
409 }
410 
411 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
412                           unsigned int out_bytes)
413 {
414     unsigned int in_total, out_total;
415 
416     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
417     return in_bytes <= in_total && out_bytes <= out_total;
418 }
419 
420 void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
421     size_t num_sg, int is_write)
422 {
423     unsigned int i;
424     hwaddr len;
425 
426     for (i = 0; i < num_sg; i++) {
427         len = sg[i].iov_len;
428         sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
429         if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
430             error_report("virtio: trying to map MMIO memory");
431             exit(1);
432         }
433     }
434 }
435 
436 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
437 {
438     unsigned int i, head, max;
439     hwaddr desc_pa = vq->vring.desc;
440 
441     if (!virtqueue_num_heads(vq, vq->last_avail_idx))
442         return 0;
443 
444     /* When we start there are none of either input nor output. */
445     elem->out_num = elem->in_num = 0;
446 
447     max = vq->vring.num;
448 
449     i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
450     if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
451         vring_avail_event(vq, vring_avail_idx(vq));
452     }
453 
454     if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
455         if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
456             error_report("Invalid size for indirect buffer table");
457             exit(1);
458         }
459 
460         /* loop over the indirect descriptor table */
461         max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
462         desc_pa = vring_desc_addr(desc_pa, i);
463         i = 0;
464     }
465 
466     /* Collect all the descriptors */
467     do {
468         struct iovec *sg;
469 
470         if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
471             if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
472                 error_report("Too many write descriptors in indirect table");
473                 exit(1);
474             }
475             elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
476             sg = &elem->in_sg[elem->in_num++];
477         } else {
478             if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
479                 error_report("Too many read descriptors in indirect table");
480                 exit(1);
481             }
482             elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
483             sg = &elem->out_sg[elem->out_num++];
484         }
485 
486         sg->iov_len = vring_desc_len(desc_pa, i);
487 
488         /* If we've got too many, that implies a descriptor loop. */
489         if ((elem->in_num + elem->out_num) > max) {
490             error_report("Looped descriptor");
491             exit(1);
492         }
493     } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
494 
495     /* Now map what we have collected */
496     virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
497     virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
498 
499     elem->index = head;
500 
501     vq->inuse++;
502 
503     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
504     return elem->in_num + elem->out_num;
505 }
506 
507 /* virtio device */
508 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
509 {
510     if (vdev->binding->notify) {
511         vdev->binding->notify(vdev->binding_opaque, vector);
512     }
513 }
514 
515 void virtio_update_irq(VirtIODevice *vdev)
516 {
517     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
518 }
519 
520 void virtio_set_status(VirtIODevice *vdev, uint8_t val)
521 {
522     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
523     trace_virtio_set_status(vdev, val);
524 
525     if (k->set_status) {
526         k->set_status(vdev, val);
527     }
528     vdev->status = val;
529 }
530 
531 void virtio_reset(void *opaque)
532 {
533     VirtIODevice *vdev = opaque;
534     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
535     int i;
536 
537     virtio_set_status(vdev, 0);
538 
539     if (k->reset) {
540         k->reset(vdev);
541     }
542 
543     vdev->guest_features = 0;
544     vdev->queue_sel = 0;
545     vdev->status = 0;
546     vdev->isr = 0;
547     vdev->config_vector = VIRTIO_NO_VECTOR;
548     virtio_notify_vector(vdev, vdev->config_vector);
549 
550     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
551         vdev->vq[i].vring.desc = 0;
552         vdev->vq[i].vring.avail = 0;
553         vdev->vq[i].vring.used = 0;
554         vdev->vq[i].last_avail_idx = 0;
555         vdev->vq[i].pa = 0;
556         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
557         vdev->vq[i].signalled_used = 0;
558         vdev->vq[i].signalled_used_valid = false;
559         vdev->vq[i].notification = true;
560     }
561 }
562 
563 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
564 {
565     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
566     uint8_t val;
567 
568     k->get_config(vdev, vdev->config);
569 
570     if (addr > (vdev->config_len - sizeof(val)))
571         return (uint32_t)-1;
572 
573     val = ldub_p(vdev->config + addr);
574     return val;
575 }
576 
577 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
578 {
579     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
580     uint16_t val;
581 
582     k->get_config(vdev, vdev->config);
583 
584     if (addr > (vdev->config_len - sizeof(val)))
585         return (uint32_t)-1;
586 
587     val = lduw_p(vdev->config + addr);
588     return val;
589 }
590 
591 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
592 {
593     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
594     uint32_t val;
595 
596     k->get_config(vdev, vdev->config);
597 
598     if (addr > (vdev->config_len - sizeof(val)))
599         return (uint32_t)-1;
600 
601     val = ldl_p(vdev->config + addr);
602     return val;
603 }
604 
605 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
606 {
607     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
608     uint8_t val = data;
609 
610     if (addr > (vdev->config_len - sizeof(val)))
611         return;
612 
613     stb_p(vdev->config + addr, val);
614 
615     if (k->set_config) {
616         k->set_config(vdev, vdev->config);
617     }
618 }
619 
620 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
621 {
622     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
623     uint16_t val = data;
624 
625     if (addr > (vdev->config_len - sizeof(val)))
626         return;
627 
628     stw_p(vdev->config + addr, val);
629 
630     if (k->set_config) {
631         k->set_config(vdev, vdev->config);
632     }
633 }
634 
635 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
636 {
637     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
638     uint32_t val = data;
639 
640     if (addr > (vdev->config_len - sizeof(val)))
641         return;
642 
643     stl_p(vdev->config + addr, val);
644 
645     if (k->set_config) {
646         k->set_config(vdev, vdev->config);
647     }
648 }
649 
650 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
651 {
652     vdev->vq[n].pa = addr;
653     virtqueue_init(&vdev->vq[n]);
654 }
655 
656 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
657 {
658     return vdev->vq[n].pa;
659 }
660 
661 int virtio_queue_get_num(VirtIODevice *vdev, int n)
662 {
663     return vdev->vq[n].vring.num;
664 }
665 
666 int virtio_queue_get_id(VirtQueue *vq)
667 {
668     VirtIODevice *vdev = vq->vdev;
669     assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]);
670     return vq - &vdev->vq[0];
671 }
672 
673 void virtio_queue_notify_vq(VirtQueue *vq)
674 {
675     if (vq->vring.desc) {
676         VirtIODevice *vdev = vq->vdev;
677         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
678         vq->handle_output(vdev, vq);
679     }
680 }
681 
682 void virtio_queue_notify(VirtIODevice *vdev, int n)
683 {
684     virtio_queue_notify_vq(&vdev->vq[n]);
685 }
686 
687 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
688 {
689     return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
690         VIRTIO_NO_VECTOR;
691 }
692 
693 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
694 {
695     if (n < VIRTIO_PCI_QUEUE_MAX)
696         vdev->vq[n].vector = vector;
697 }
698 
699 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
700                             void (*handle_output)(VirtIODevice *, VirtQueue *))
701 {
702     int i;
703 
704     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
705         if (vdev->vq[i].vring.num == 0)
706             break;
707     }
708 
709     if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
710         abort();
711 
712     vdev->vq[i].vring.num = queue_size;
713     vdev->vq[i].handle_output = handle_output;
714 
715     return &vdev->vq[i];
716 }
717 
718 void virtio_del_queue(VirtIODevice *vdev, int n)
719 {
720     if (n < 0 || n >= VIRTIO_PCI_QUEUE_MAX) {
721         abort();
722     }
723 
724     vdev->vq[n].vring.num = 0;
725 }
726 
727 void virtio_irq(VirtQueue *vq)
728 {
729     trace_virtio_irq(vq);
730     vq->vdev->isr |= 0x01;
731     virtio_notify_vector(vq->vdev, vq->vector);
732 }
733 
734 /* Assuming a given event_idx value from the other size, if
735  * we have just incremented index from old to new_idx,
736  * should we trigger an event? */
737 static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old)
738 {
739 	/* Note: Xen has similar logic for notification hold-off
740 	 * in include/xen/interface/io/ring.h with req_event and req_prod
741 	 * corresponding to event_idx + 1 and new respectively.
742 	 * Note also that req_event and req_prod in Xen start at 1,
743 	 * event indexes in virtio start at 0. */
744 	return (uint16_t)(new - event - 1) < (uint16_t)(new - old);
745 }
746 
747 static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
748 {
749     uint16_t old, new;
750     bool v;
751     /* We need to expose used array entries before checking used event. */
752     smp_mb();
753     /* Always notify when queue is empty (when feature acknowledge) */
754     if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) &&
755          !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) {
756         return true;
757     }
758 
759     if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
760         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
761     }
762 
763     v = vq->signalled_used_valid;
764     vq->signalled_used_valid = true;
765     old = vq->signalled_used;
766     new = vq->signalled_used = vring_used_idx(vq);
767     return !v || vring_need_event(vring_used_event(vq), new, old);
768 }
769 
770 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
771 {
772     if (!vring_notify(vdev, vq)) {
773         return;
774     }
775 
776     trace_virtio_notify(vdev, vq);
777     vdev->isr |= 0x01;
778     virtio_notify_vector(vdev, vq->vector);
779 }
780 
781 void virtio_notify_config(VirtIODevice *vdev)
782 {
783     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
784         return;
785 
786     vdev->isr |= 0x03;
787     virtio_notify_vector(vdev, vdev->config_vector);
788 }
789 
790 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
791 {
792     int i;
793 
794     if (vdev->binding->save_config)
795         vdev->binding->save_config(vdev->binding_opaque, f);
796 
797     qemu_put_8s(f, &vdev->status);
798     qemu_put_8s(f, &vdev->isr);
799     qemu_put_be16s(f, &vdev->queue_sel);
800     qemu_put_be32s(f, &vdev->guest_features);
801     qemu_put_be32(f, vdev->config_len);
802     qemu_put_buffer(f, vdev->config, vdev->config_len);
803 
804     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
805         if (vdev->vq[i].vring.num == 0)
806             break;
807     }
808 
809     qemu_put_be32(f, i);
810 
811     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
812         if (vdev->vq[i].vring.num == 0)
813             break;
814 
815         qemu_put_be32(f, vdev->vq[i].vring.num);
816         qemu_put_be64(f, vdev->vq[i].pa);
817         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
818         if (vdev->binding->save_queue)
819             vdev->binding->save_queue(vdev->binding_opaque, i, f);
820     }
821 }
822 
823 int virtio_set_features(VirtIODevice *vdev, uint32_t val)
824 {
825     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
826     uint32_t supported_features =
827         vdev->binding->get_features(vdev->binding_opaque);
828     bool bad = (val & ~supported_features) != 0;
829 
830     val &= supported_features;
831     if (k->set_features) {
832         k->set_features(vdev, val);
833     }
834     vdev->guest_features = val;
835     return bad ? -1 : 0;
836 }
837 
838 int virtio_load(VirtIODevice *vdev, QEMUFile *f)
839 {
840     int num, i, ret;
841     uint32_t features;
842     uint32_t supported_features;
843 
844     if (vdev->binding->load_config) {
845         ret = vdev->binding->load_config(vdev->binding_opaque, f);
846         if (ret)
847             return ret;
848     }
849 
850     qemu_get_8s(f, &vdev->status);
851     qemu_get_8s(f, &vdev->isr);
852     qemu_get_be16s(f, &vdev->queue_sel);
853     qemu_get_be32s(f, &features);
854 
855     if (virtio_set_features(vdev, features) < 0) {
856         supported_features = vdev->binding->get_features(vdev->binding_opaque);
857         error_report("Features 0x%x unsupported. Allowed features: 0x%x",
858                      features, supported_features);
859         return -1;
860     }
861     vdev->config_len = qemu_get_be32(f);
862     qemu_get_buffer(f, vdev->config, vdev->config_len);
863 
864     num = qemu_get_be32(f);
865 
866     for (i = 0; i < num; i++) {
867         vdev->vq[i].vring.num = qemu_get_be32(f);
868         vdev->vq[i].pa = qemu_get_be64(f);
869         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
870         vdev->vq[i].signalled_used_valid = false;
871         vdev->vq[i].notification = true;
872 
873         if (vdev->vq[i].pa) {
874             uint16_t nheads;
875             virtqueue_init(&vdev->vq[i]);
876             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
877             /* Check it isn't doing very strange things with descriptor numbers. */
878             if (nheads > vdev->vq[i].vring.num) {
879                 error_report("VQ %d size 0x%x Guest index 0x%x "
880                              "inconsistent with Host index 0x%x: delta 0x%x",
881                              i, vdev->vq[i].vring.num,
882                              vring_avail_idx(&vdev->vq[i]),
883                              vdev->vq[i].last_avail_idx, nheads);
884                 return -1;
885             }
886         } else if (vdev->vq[i].last_avail_idx) {
887             error_report("VQ %d address 0x0 "
888                          "inconsistent with Host index 0x%x",
889                          i, vdev->vq[i].last_avail_idx);
890                 return -1;
891 	}
892         if (vdev->binding->load_queue) {
893             ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
894             if (ret)
895                 return ret;
896         }
897     }
898 
899     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
900     return 0;
901 }
902 
903 void virtio_common_cleanup(VirtIODevice *vdev)
904 {
905     qemu_del_vm_change_state_handler(vdev->vmstate);
906     g_free(vdev->config);
907     g_free(vdev->vq);
908 }
909 
910 void virtio_cleanup(VirtIODevice *vdev)
911 {
912     virtio_common_cleanup(vdev);
913     g_free(vdev);
914 }
915 
916 static void virtio_vmstate_change(void *opaque, int running, RunState state)
917 {
918     VirtIODevice *vdev = opaque;
919     bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
920     vdev->vm_running = running;
921 
922     if (backend_run) {
923         virtio_set_status(vdev, vdev->status);
924     }
925 
926     if (vdev->binding->vmstate_change) {
927         vdev->binding->vmstate_change(vdev->binding_opaque, backend_run);
928     }
929 
930     if (!backend_run) {
931         virtio_set_status(vdev, vdev->status);
932     }
933 }
934 
935 void virtio_init(VirtIODevice *vdev, const char *name,
936                  uint16_t device_id, size_t config_size)
937 {
938     int i;
939     vdev->device_id = device_id;
940     vdev->status = 0;
941     vdev->isr = 0;
942     vdev->queue_sel = 0;
943     vdev->config_vector = VIRTIO_NO_VECTOR;
944     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
945     vdev->vm_running = runstate_is_running();
946     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
947         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
948         vdev->vq[i].vdev = vdev;
949         vdev->vq[i].queue_index = i;
950     }
951 
952     vdev->name = name;
953     vdev->config_len = config_size;
954     if (vdev->config_len) {
955         vdev->config = g_malloc0(config_size);
956     } else {
957         vdev->config = NULL;
958     }
959     vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
960                                                      vdev);
961 }
962 
963 VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
964                                  size_t config_size, size_t struct_size)
965 {
966     VirtIODevice *vdev;
967     vdev = g_malloc0(struct_size);
968     virtio_init(vdev, name, device_id, config_size);
969     return vdev;
970 }
971 
972 void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
973                         DeviceState *opaque)
974 {
975     vdev->binding = binding;
976     vdev->binding_opaque = opaque;
977 }
978 
979 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
980 {
981     return vdev->vq[n].vring.desc;
982 }
983 
984 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
985 {
986     return vdev->vq[n].vring.avail;
987 }
988 
989 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
990 {
991     return vdev->vq[n].vring.used;
992 }
993 
994 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
995 {
996     return vdev->vq[n].vring.desc;
997 }
998 
999 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1000 {
1001     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1002 }
1003 
1004 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1005 {
1006     return offsetof(VRingAvail, ring) +
1007         sizeof(uint64_t) * vdev->vq[n].vring.num;
1008 }
1009 
1010 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1011 {
1012     return offsetof(VRingUsed, ring) +
1013         sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1014 }
1015 
1016 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
1017 {
1018     return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
1019 	    virtio_queue_get_used_size(vdev, n);
1020 }
1021 
1022 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1023 {
1024     return vdev->vq[n].last_avail_idx;
1025 }
1026 
1027 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1028 {
1029     vdev->vq[n].last_avail_idx = idx;
1030 }
1031 
1032 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
1033 {
1034     return vdev->vq + n;
1035 }
1036 
1037 uint16_t virtio_get_queue_index(VirtQueue *vq)
1038 {
1039     return vq->queue_index;
1040 }
1041 
1042 static void virtio_queue_guest_notifier_read(EventNotifier *n)
1043 {
1044     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
1045     if (event_notifier_test_and_clear(n)) {
1046         virtio_irq(vq);
1047     }
1048 }
1049 
1050 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
1051                                                 bool with_irqfd)
1052 {
1053     if (assign && !with_irqfd) {
1054         event_notifier_set_handler(&vq->guest_notifier,
1055                                    virtio_queue_guest_notifier_read);
1056     } else {
1057         event_notifier_set_handler(&vq->guest_notifier, NULL);
1058     }
1059     if (!assign) {
1060         /* Test and clear notifier before closing it,
1061          * in case poll callback didn't have time to run. */
1062         virtio_queue_guest_notifier_read(&vq->guest_notifier);
1063     }
1064 }
1065 
1066 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
1067 {
1068     return &vq->guest_notifier;
1069 }
1070 
1071 static void virtio_queue_host_notifier_read(EventNotifier *n)
1072 {
1073     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1074     if (event_notifier_test_and_clear(n)) {
1075         virtio_queue_notify_vq(vq);
1076     }
1077 }
1078 
1079 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
1080                                                bool set_handler)
1081 {
1082     if (assign && set_handler) {
1083         event_notifier_set_handler(&vq->host_notifier,
1084                                    virtio_queue_host_notifier_read);
1085     } else {
1086         event_notifier_set_handler(&vq->host_notifier, NULL);
1087     }
1088     if (!assign) {
1089         /* Test and clear notifier before after disabling event,
1090          * in case poll callback didn't have time to run. */
1091         virtio_queue_host_notifier_read(&vq->host_notifier);
1092     }
1093 }
1094 
1095 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
1096 {
1097     return &vq->host_notifier;
1098 }
1099 
1100 static int virtio_device_init(DeviceState *qdev)
1101 {
1102     VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
1103     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(qdev);
1104     assert(k->init != NULL);
1105     if (k->init(vdev) < 0) {
1106         return -1;
1107     }
1108     virtio_bus_plug_device(vdev);
1109     return 0;
1110 }
1111 
1112 static void virtio_device_class_init(ObjectClass *klass, void *data)
1113 {
1114     /* Set the default value here. */
1115     DeviceClass *dc = DEVICE_CLASS(klass);
1116     dc->init = virtio_device_init;
1117     dc->bus_type = TYPE_VIRTIO_BUS;
1118 }
1119 
1120 static const TypeInfo virtio_device_info = {
1121     .name = TYPE_VIRTIO_DEVICE,
1122     .parent = TYPE_DEVICE,
1123     .instance_size = sizeof(VirtIODevice),
1124     .class_init = virtio_device_class_init,
1125     .abstract = true,
1126     .class_size = sizeof(VirtioDeviceClass),
1127 };
1128 
1129 static void virtio_register_types(void)
1130 {
1131     type_register_static(&virtio_device_info);
1132 }
1133 
1134 type_init(virtio_register_types)
1135