xref: /openbmc/qemu/hw/virtio/virtio.c (revision cd4eb4c5)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include <inttypes.h>
15 
16 #include "trace.h"
17 #include "qemu/error-report.h"
18 #include "hw/virtio/virtio.h"
19 #include "qemu/atomic.h"
20 #include "hw/virtio/virtio-bus.h"
21 
22 /*
23  * The alignment to use between consumer and producer parts of vring.
24  * x86 pagesize again. This is the default, used by transports like PCI
25  * which don't provide a means for the guest to tell the host the alignment.
26  */
27 #define VIRTIO_PCI_VRING_ALIGN         4096
28 
29 typedef struct VRingDesc
30 {
31     uint64_t addr;
32     uint32_t len;
33     uint16_t flags;
34     uint16_t next;
35 } VRingDesc;
36 
37 typedef struct VRingAvail
38 {
39     uint16_t flags;
40     uint16_t idx;
41     uint16_t ring[0];
42 } VRingAvail;
43 
44 typedef struct VRingUsedElem
45 {
46     uint32_t id;
47     uint32_t len;
48 } VRingUsedElem;
49 
50 typedef struct VRingUsed
51 {
52     uint16_t flags;
53     uint16_t idx;
54     VRingUsedElem ring[0];
55 } VRingUsed;
56 
57 typedef struct VRing
58 {
59     unsigned int num;
60     unsigned int align;
61     hwaddr desc;
62     hwaddr avail;
63     hwaddr used;
64 } VRing;
65 
66 struct VirtQueue
67 {
68     VRing vring;
69     hwaddr pa;
70     uint16_t last_avail_idx;
71     /* Last used index value we have signalled on */
72     uint16_t signalled_used;
73 
74     /* Last used index value we have signalled on */
75     bool signalled_used_valid;
76 
77     /* Notification enabled? */
78     bool notification;
79 
80     uint16_t queue_index;
81 
82     int inuse;
83 
84     uint16_t vector;
85     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
86     VirtIODevice *vdev;
87     EventNotifier guest_notifier;
88     EventNotifier host_notifier;
89 };
90 
91 /* virt queue functions */
92 static void virtqueue_init(VirtQueue *vq)
93 {
94     hwaddr pa = vq->pa;
95 
96     vq->vring.desc = pa;
97     vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
98     vq->vring.used = vring_align(vq->vring.avail +
99                                  offsetof(VRingAvail, ring[vq->vring.num]),
100                                  vq->vring.align);
101 }
102 
103 static inline uint64_t vring_desc_addr(hwaddr desc_pa, int i)
104 {
105     hwaddr pa;
106     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
107     return ldq_phys(pa);
108 }
109 
110 static inline uint32_t vring_desc_len(hwaddr desc_pa, int i)
111 {
112     hwaddr pa;
113     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
114     return ldl_phys(pa);
115 }
116 
117 static inline uint16_t vring_desc_flags(hwaddr desc_pa, int i)
118 {
119     hwaddr pa;
120     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
121     return lduw_phys(pa);
122 }
123 
124 static inline uint16_t vring_desc_next(hwaddr desc_pa, int i)
125 {
126     hwaddr pa;
127     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
128     return lduw_phys(pa);
129 }
130 
131 static inline uint16_t vring_avail_flags(VirtQueue *vq)
132 {
133     hwaddr pa;
134     pa = vq->vring.avail + offsetof(VRingAvail, flags);
135     return lduw_phys(pa);
136 }
137 
138 static inline uint16_t vring_avail_idx(VirtQueue *vq)
139 {
140     hwaddr pa;
141     pa = vq->vring.avail + offsetof(VRingAvail, idx);
142     return lduw_phys(pa);
143 }
144 
145 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
146 {
147     hwaddr pa;
148     pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
149     return lduw_phys(pa);
150 }
151 
152 static inline uint16_t vring_used_event(VirtQueue *vq)
153 {
154     return vring_avail_ring(vq, vq->vring.num);
155 }
156 
157 static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
158 {
159     hwaddr pa;
160     pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
161     stl_phys(pa, val);
162 }
163 
164 static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
165 {
166     hwaddr pa;
167     pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
168     stl_phys(pa, val);
169 }
170 
171 static uint16_t vring_used_idx(VirtQueue *vq)
172 {
173     hwaddr pa;
174     pa = vq->vring.used + offsetof(VRingUsed, idx);
175     return lduw_phys(pa);
176 }
177 
178 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
179 {
180     hwaddr pa;
181     pa = vq->vring.used + offsetof(VRingUsed, idx);
182     stw_phys(pa, val);
183 }
184 
185 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
186 {
187     hwaddr pa;
188     pa = vq->vring.used + offsetof(VRingUsed, flags);
189     stw_phys(pa, lduw_phys(pa) | mask);
190 }
191 
192 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
193 {
194     hwaddr pa;
195     pa = vq->vring.used + offsetof(VRingUsed, flags);
196     stw_phys(pa, lduw_phys(pa) & ~mask);
197 }
198 
199 static inline void vring_avail_event(VirtQueue *vq, uint16_t val)
200 {
201     hwaddr pa;
202     if (!vq->notification) {
203         return;
204     }
205     pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
206     stw_phys(pa, val);
207 }
208 
209 void virtio_queue_set_notification(VirtQueue *vq, int enable)
210 {
211     vq->notification = enable;
212     if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
213         vring_avail_event(vq, vring_avail_idx(vq));
214     } else if (enable) {
215         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
216     } else {
217         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
218     }
219     if (enable) {
220         /* Expose avail event/used flags before caller checks the avail idx. */
221         smp_mb();
222     }
223 }
224 
225 int virtio_queue_ready(VirtQueue *vq)
226 {
227     return vq->vring.avail != 0;
228 }
229 
230 int virtio_queue_empty(VirtQueue *vq)
231 {
232     return vring_avail_idx(vq) == vq->last_avail_idx;
233 }
234 
235 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
236                     unsigned int len, unsigned int idx)
237 {
238     unsigned int offset;
239     int i;
240 
241     trace_virtqueue_fill(vq, elem, len, idx);
242 
243     offset = 0;
244     for (i = 0; i < elem->in_num; i++) {
245         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
246 
247         cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
248                                   elem->in_sg[i].iov_len,
249                                   1, size);
250 
251         offset += size;
252     }
253 
254     for (i = 0; i < elem->out_num; i++)
255         cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
256                                   elem->out_sg[i].iov_len,
257                                   0, elem->out_sg[i].iov_len);
258 
259     idx = (idx + vring_used_idx(vq)) % vq->vring.num;
260 
261     /* Get a pointer to the next entry in the used ring. */
262     vring_used_ring_id(vq, idx, elem->index);
263     vring_used_ring_len(vq, idx, len);
264 }
265 
266 void virtqueue_flush(VirtQueue *vq, unsigned int count)
267 {
268     uint16_t old, new;
269     /* Make sure buffer is written before we update index. */
270     smp_wmb();
271     trace_virtqueue_flush(vq, count);
272     old = vring_used_idx(vq);
273     new = old + count;
274     vring_used_idx_set(vq, new);
275     vq->inuse -= count;
276     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
277         vq->signalled_used_valid = false;
278 }
279 
280 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
281                     unsigned int len)
282 {
283     virtqueue_fill(vq, elem, len, 0);
284     virtqueue_flush(vq, 1);
285 }
286 
287 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
288 {
289     uint16_t num_heads = vring_avail_idx(vq) - idx;
290 
291     /* Check it isn't doing very strange things with descriptor numbers. */
292     if (num_heads > vq->vring.num) {
293         error_report("Guest moved used index from %u to %u",
294                      idx, vring_avail_idx(vq));
295         exit(1);
296     }
297     /* On success, callers read a descriptor at vq->last_avail_idx.
298      * Make sure descriptor read does not bypass avail index read. */
299     if (num_heads) {
300         smp_rmb();
301     }
302 
303     return num_heads;
304 }
305 
306 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
307 {
308     unsigned int head;
309 
310     /* Grab the next descriptor number they're advertising, and increment
311      * the index we've seen. */
312     head = vring_avail_ring(vq, idx % vq->vring.num);
313 
314     /* If their number is silly, that's a fatal mistake. */
315     if (head >= vq->vring.num) {
316         error_report("Guest says index %u is available", head);
317         exit(1);
318     }
319 
320     return head;
321 }
322 
323 static unsigned virtqueue_next_desc(hwaddr desc_pa,
324                                     unsigned int i, unsigned int max)
325 {
326     unsigned int next;
327 
328     /* If this descriptor says it doesn't chain, we're done. */
329     if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
330         return max;
331 
332     /* Check they're not leading us off end of descriptors. */
333     next = vring_desc_next(desc_pa, i);
334     /* Make sure compiler knows to grab that: we don't want it changing! */
335     smp_wmb();
336 
337     if (next >= max) {
338         error_report("Desc next is %u", next);
339         exit(1);
340     }
341 
342     return next;
343 }
344 
345 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
346                                unsigned int *out_bytes,
347                                unsigned max_in_bytes, unsigned max_out_bytes)
348 {
349     unsigned int idx;
350     unsigned int total_bufs, in_total, out_total;
351 
352     idx = vq->last_avail_idx;
353 
354     total_bufs = in_total = out_total = 0;
355     while (virtqueue_num_heads(vq, idx)) {
356         unsigned int max, num_bufs, indirect = 0;
357         hwaddr desc_pa;
358         int i;
359 
360         max = vq->vring.num;
361         num_bufs = total_bufs;
362         i = virtqueue_get_head(vq, idx++);
363         desc_pa = vq->vring.desc;
364 
365         if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
366             if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
367                 error_report("Invalid size for indirect buffer table");
368                 exit(1);
369             }
370 
371             /* If we've got too many, that implies a descriptor loop. */
372             if (num_bufs >= max) {
373                 error_report("Looped descriptor");
374                 exit(1);
375             }
376 
377             /* loop over the indirect descriptor table */
378             indirect = 1;
379             max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
380             desc_pa = vring_desc_addr(desc_pa, i);
381             num_bufs = i = 0;
382         }
383 
384         do {
385             /* If we've got too many, that implies a descriptor loop. */
386             if (++num_bufs > max) {
387                 error_report("Looped descriptor");
388                 exit(1);
389             }
390 
391             if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
392                 in_total += vring_desc_len(desc_pa, i);
393             } else {
394                 out_total += vring_desc_len(desc_pa, i);
395             }
396             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
397                 goto done;
398             }
399         } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
400 
401         if (!indirect)
402             total_bufs = num_bufs;
403         else
404             total_bufs++;
405     }
406 done:
407     if (in_bytes) {
408         *in_bytes = in_total;
409     }
410     if (out_bytes) {
411         *out_bytes = out_total;
412     }
413 }
414 
415 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
416                           unsigned int out_bytes)
417 {
418     unsigned int in_total, out_total;
419 
420     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
421     return in_bytes <= in_total && out_bytes <= out_total;
422 }
423 
424 void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
425     size_t num_sg, int is_write)
426 {
427     unsigned int i;
428     hwaddr len;
429 
430     for (i = 0; i < num_sg; i++) {
431         len = sg[i].iov_len;
432         sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
433         if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
434             error_report("virtio: trying to map MMIO memory");
435             exit(1);
436         }
437     }
438 }
439 
440 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
441 {
442     unsigned int i, head, max;
443     hwaddr desc_pa = vq->vring.desc;
444 
445     if (!virtqueue_num_heads(vq, vq->last_avail_idx))
446         return 0;
447 
448     /* When we start there are none of either input nor output. */
449     elem->out_num = elem->in_num = 0;
450 
451     max = vq->vring.num;
452 
453     i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
454     if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
455         vring_avail_event(vq, vring_avail_idx(vq));
456     }
457 
458     if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
459         if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
460             error_report("Invalid size for indirect buffer table");
461             exit(1);
462         }
463 
464         /* loop over the indirect descriptor table */
465         max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
466         desc_pa = vring_desc_addr(desc_pa, i);
467         i = 0;
468     }
469 
470     /* Collect all the descriptors */
471     do {
472         struct iovec *sg;
473 
474         if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
475             if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
476                 error_report("Too many write descriptors in indirect table");
477                 exit(1);
478             }
479             elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
480             sg = &elem->in_sg[elem->in_num++];
481         } else {
482             if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
483                 error_report("Too many read descriptors in indirect table");
484                 exit(1);
485             }
486             elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
487             sg = &elem->out_sg[elem->out_num++];
488         }
489 
490         sg->iov_len = vring_desc_len(desc_pa, i);
491 
492         /* If we've got too many, that implies a descriptor loop. */
493         if ((elem->in_num + elem->out_num) > max) {
494             error_report("Looped descriptor");
495             exit(1);
496         }
497     } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
498 
499     /* Now map what we have collected */
500     virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
501     virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
502 
503     elem->index = head;
504 
505     vq->inuse++;
506 
507     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
508     return elem->in_num + elem->out_num;
509 }
510 
511 /* virtio device */
512 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
513 {
514     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
515     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
516 
517     if (k->notify) {
518         k->notify(qbus->parent, vector);
519     }
520 }
521 
522 void virtio_update_irq(VirtIODevice *vdev)
523 {
524     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
525 }
526 
527 void virtio_set_status(VirtIODevice *vdev, uint8_t val)
528 {
529     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
530     trace_virtio_set_status(vdev, val);
531 
532     if (k->set_status) {
533         k->set_status(vdev, val);
534     }
535     vdev->status = val;
536 }
537 
538 void virtio_reset(void *opaque)
539 {
540     VirtIODevice *vdev = opaque;
541     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
542     int i;
543 
544     virtio_set_status(vdev, 0);
545 
546     if (k->reset) {
547         k->reset(vdev);
548     }
549 
550     vdev->guest_features = 0;
551     vdev->queue_sel = 0;
552     vdev->status = 0;
553     vdev->isr = 0;
554     vdev->config_vector = VIRTIO_NO_VECTOR;
555     virtio_notify_vector(vdev, vdev->config_vector);
556 
557     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
558         vdev->vq[i].vring.desc = 0;
559         vdev->vq[i].vring.avail = 0;
560         vdev->vq[i].vring.used = 0;
561         vdev->vq[i].last_avail_idx = 0;
562         vdev->vq[i].pa = 0;
563         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
564         vdev->vq[i].signalled_used = 0;
565         vdev->vq[i].signalled_used_valid = false;
566         vdev->vq[i].notification = true;
567     }
568 }
569 
570 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
571 {
572     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
573     uint8_t val;
574 
575     if (addr + sizeof(val) > vdev->config_len) {
576         return (uint32_t)-1;
577     }
578 
579     k->get_config(vdev, vdev->config);
580 
581     val = ldub_p(vdev->config + addr);
582     return val;
583 }
584 
585 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
586 {
587     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
588     uint16_t val;
589 
590     if (addr + sizeof(val) > vdev->config_len) {
591         return (uint32_t)-1;
592     }
593 
594     k->get_config(vdev, vdev->config);
595 
596     val = lduw_p(vdev->config + addr);
597     return val;
598 }
599 
600 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
601 {
602     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
603     uint32_t val;
604 
605     if (addr + sizeof(val) > vdev->config_len) {
606         return (uint32_t)-1;
607     }
608 
609     k->get_config(vdev, vdev->config);
610 
611     val = ldl_p(vdev->config + addr);
612     return val;
613 }
614 
615 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
616 {
617     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
618     uint8_t val = data;
619 
620     if (addr + sizeof(val) > vdev->config_len) {
621         return;
622     }
623 
624     stb_p(vdev->config + addr, val);
625 
626     if (k->set_config) {
627         k->set_config(vdev, vdev->config);
628     }
629 }
630 
631 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
632 {
633     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
634     uint16_t val = data;
635 
636     if (addr + sizeof(val) > vdev->config_len) {
637         return;
638     }
639 
640     stw_p(vdev->config + addr, val);
641 
642     if (k->set_config) {
643         k->set_config(vdev, vdev->config);
644     }
645 }
646 
647 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
648 {
649     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
650     uint32_t val = data;
651 
652     if (addr + sizeof(val) > vdev->config_len) {
653         return;
654     }
655 
656     stl_p(vdev->config + addr, val);
657 
658     if (k->set_config) {
659         k->set_config(vdev, vdev->config);
660     }
661 }
662 
663 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
664 {
665     vdev->vq[n].pa = addr;
666     virtqueue_init(&vdev->vq[n]);
667 }
668 
669 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
670 {
671     return vdev->vq[n].pa;
672 }
673 
674 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
675 {
676     /* Don't allow guest to flip queue between existent and
677      * nonexistent states, or to set it to an invalid size.
678      */
679     if (!!num != !!vdev->vq[n].vring.num ||
680         num > VIRTQUEUE_MAX_SIZE ||
681         num < 0) {
682         return;
683     }
684     vdev->vq[n].vring.num = num;
685     virtqueue_init(&vdev->vq[n]);
686 }
687 
688 int virtio_queue_get_num(VirtIODevice *vdev, int n)
689 {
690     return vdev->vq[n].vring.num;
691 }
692 
693 int virtio_queue_get_id(VirtQueue *vq)
694 {
695     VirtIODevice *vdev = vq->vdev;
696     assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]);
697     return vq - &vdev->vq[0];
698 }
699 
700 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
701 {
702     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
703     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
704 
705     /* Check that the transport told us it was going to do this
706      * (so a buggy transport will immediately assert rather than
707      * silently failing to migrate this state)
708      */
709     assert(k->has_variable_vring_alignment);
710 
711     vdev->vq[n].vring.align = align;
712     virtqueue_init(&vdev->vq[n]);
713 }
714 
715 void virtio_queue_notify_vq(VirtQueue *vq)
716 {
717     if (vq->vring.desc) {
718         VirtIODevice *vdev = vq->vdev;
719         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
720         vq->handle_output(vdev, vq);
721     }
722 }
723 
724 void virtio_queue_notify(VirtIODevice *vdev, int n)
725 {
726     virtio_queue_notify_vq(&vdev->vq[n]);
727 }
728 
729 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
730 {
731     return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
732         VIRTIO_NO_VECTOR;
733 }
734 
735 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
736 {
737     if (n < VIRTIO_PCI_QUEUE_MAX)
738         vdev->vq[n].vector = vector;
739 }
740 
741 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
742                             void (*handle_output)(VirtIODevice *, VirtQueue *))
743 {
744     int i;
745 
746     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
747         if (vdev->vq[i].vring.num == 0)
748             break;
749     }
750 
751     if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
752         abort();
753 
754     vdev->vq[i].vring.num = queue_size;
755     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
756     vdev->vq[i].handle_output = handle_output;
757 
758     return &vdev->vq[i];
759 }
760 
761 void virtio_del_queue(VirtIODevice *vdev, int n)
762 {
763     if (n < 0 || n >= VIRTIO_PCI_QUEUE_MAX) {
764         abort();
765     }
766 
767     vdev->vq[n].vring.num = 0;
768 }
769 
770 void virtio_irq(VirtQueue *vq)
771 {
772     trace_virtio_irq(vq);
773     vq->vdev->isr |= 0x01;
774     virtio_notify_vector(vq->vdev, vq->vector);
775 }
776 
777 /* Assuming a given event_idx value from the other size, if
778  * we have just incremented index from old to new_idx,
779  * should we trigger an event? */
780 static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old)
781 {
782 	/* Note: Xen has similar logic for notification hold-off
783 	 * in include/xen/interface/io/ring.h with req_event and req_prod
784 	 * corresponding to event_idx + 1 and new respectively.
785 	 * Note also that req_event and req_prod in Xen start at 1,
786 	 * event indexes in virtio start at 0. */
787 	return (uint16_t)(new - event - 1) < (uint16_t)(new - old);
788 }
789 
790 static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
791 {
792     uint16_t old, new;
793     bool v;
794     /* We need to expose used array entries before checking used event. */
795     smp_mb();
796     /* Always notify when queue is empty (when feature acknowledge) */
797     if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) &&
798          !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) {
799         return true;
800     }
801 
802     if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
803         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
804     }
805 
806     v = vq->signalled_used_valid;
807     vq->signalled_used_valid = true;
808     old = vq->signalled_used;
809     new = vq->signalled_used = vring_used_idx(vq);
810     return !v || vring_need_event(vring_used_event(vq), new, old);
811 }
812 
813 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
814 {
815     if (!vring_notify(vdev, vq)) {
816         return;
817     }
818 
819     trace_virtio_notify(vdev, vq);
820     vdev->isr |= 0x01;
821     virtio_notify_vector(vdev, vq->vector);
822 }
823 
824 void virtio_notify_config(VirtIODevice *vdev)
825 {
826     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
827         return;
828 
829     vdev->isr |= 0x03;
830     virtio_notify_vector(vdev, vdev->config_vector);
831 }
832 
833 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
834 {
835     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
836     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
837     int i;
838 
839     if (k->save_config) {
840         k->save_config(qbus->parent, f);
841     }
842 
843     qemu_put_8s(f, &vdev->status);
844     qemu_put_8s(f, &vdev->isr);
845     qemu_put_be16s(f, &vdev->queue_sel);
846     qemu_put_be32s(f, &vdev->guest_features);
847     qemu_put_be32(f, vdev->config_len);
848     qemu_put_buffer(f, vdev->config, vdev->config_len);
849 
850     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
851         if (vdev->vq[i].vring.num == 0)
852             break;
853     }
854 
855     qemu_put_be32(f, i);
856 
857     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
858         if (vdev->vq[i].vring.num == 0)
859             break;
860 
861         qemu_put_be32(f, vdev->vq[i].vring.num);
862         if (k->has_variable_vring_alignment) {
863             qemu_put_be32(f, vdev->vq[i].vring.align);
864         }
865         qemu_put_be64(f, vdev->vq[i].pa);
866         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
867         if (k->save_queue) {
868             k->save_queue(qbus->parent, i, f);
869         }
870     }
871 }
872 
873 int virtio_set_features(VirtIODevice *vdev, uint32_t val)
874 {
875     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
876     VirtioBusClass *vbusk = VIRTIO_BUS_GET_CLASS(qbus);
877     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
878     uint32_t supported_features = vbusk->get_features(qbus->parent);
879     bool bad = (val & ~supported_features) != 0;
880 
881     val &= supported_features;
882     if (k->set_features) {
883         k->set_features(vdev, val);
884     }
885     vdev->guest_features = val;
886     return bad ? -1 : 0;
887 }
888 
889 int virtio_load(VirtIODevice *vdev, QEMUFile *f)
890 {
891     int num, i, ret;
892     uint32_t features;
893     uint32_t supported_features;
894     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
895     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
896 
897     if (k->load_config) {
898         ret = k->load_config(qbus->parent, f);
899         if (ret)
900             return ret;
901     }
902 
903     qemu_get_8s(f, &vdev->status);
904     qemu_get_8s(f, &vdev->isr);
905     qemu_get_be16s(f, &vdev->queue_sel);
906     qemu_get_be32s(f, &features);
907 
908     if (virtio_set_features(vdev, features) < 0) {
909         supported_features = k->get_features(qbus->parent);
910         error_report("Features 0x%x unsupported. Allowed features: 0x%x",
911                      features, supported_features);
912         return -1;
913     }
914     vdev->config_len = qemu_get_be32(f);
915     qemu_get_buffer(f, vdev->config, vdev->config_len);
916 
917     num = qemu_get_be32(f);
918 
919     for (i = 0; i < num; i++) {
920         vdev->vq[i].vring.num = qemu_get_be32(f);
921         if (k->has_variable_vring_alignment) {
922             vdev->vq[i].vring.align = qemu_get_be32(f);
923         }
924         vdev->vq[i].pa = qemu_get_be64(f);
925         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
926         vdev->vq[i].signalled_used_valid = false;
927         vdev->vq[i].notification = true;
928 
929         if (vdev->vq[i].pa) {
930             uint16_t nheads;
931             virtqueue_init(&vdev->vq[i]);
932             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
933             /* Check it isn't doing very strange things with descriptor numbers. */
934             if (nheads > vdev->vq[i].vring.num) {
935                 error_report("VQ %d size 0x%x Guest index 0x%x "
936                              "inconsistent with Host index 0x%x: delta 0x%x",
937                              i, vdev->vq[i].vring.num,
938                              vring_avail_idx(&vdev->vq[i]),
939                              vdev->vq[i].last_avail_idx, nheads);
940                 return -1;
941             }
942         } else if (vdev->vq[i].last_avail_idx) {
943             error_report("VQ %d address 0x0 "
944                          "inconsistent with Host index 0x%x",
945                          i, vdev->vq[i].last_avail_idx);
946                 return -1;
947 	}
948         if (k->load_queue) {
949             ret = k->load_queue(qbus->parent, i, f);
950             if (ret)
951                 return ret;
952         }
953     }
954 
955     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
956     return 0;
957 }
958 
959 void virtio_cleanup(VirtIODevice *vdev)
960 {
961     qemu_del_vm_change_state_handler(vdev->vmstate);
962     g_free(vdev->config);
963     g_free(vdev->vq);
964 }
965 
966 static void virtio_vmstate_change(void *opaque, int running, RunState state)
967 {
968     VirtIODevice *vdev = opaque;
969     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
970     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
971     bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
972     vdev->vm_running = running;
973 
974     if (backend_run) {
975         virtio_set_status(vdev, vdev->status);
976     }
977 
978     if (k->vmstate_change) {
979         k->vmstate_change(qbus->parent, backend_run);
980     }
981 
982     if (!backend_run) {
983         virtio_set_status(vdev, vdev->status);
984     }
985 }
986 
987 void virtio_init(VirtIODevice *vdev, const char *name,
988                  uint16_t device_id, size_t config_size)
989 {
990     int i;
991     vdev->device_id = device_id;
992     vdev->status = 0;
993     vdev->isr = 0;
994     vdev->queue_sel = 0;
995     vdev->config_vector = VIRTIO_NO_VECTOR;
996     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
997     vdev->vm_running = runstate_is_running();
998     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
999         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1000         vdev->vq[i].vdev = vdev;
1001         vdev->vq[i].queue_index = i;
1002     }
1003 
1004     vdev->name = name;
1005     vdev->config_len = config_size;
1006     if (vdev->config_len) {
1007         vdev->config = g_malloc0(config_size);
1008     } else {
1009         vdev->config = NULL;
1010     }
1011     vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
1012                                                      vdev);
1013 }
1014 
1015 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1016 {
1017     return vdev->vq[n].vring.desc;
1018 }
1019 
1020 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1021 {
1022     return vdev->vq[n].vring.avail;
1023 }
1024 
1025 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1026 {
1027     return vdev->vq[n].vring.used;
1028 }
1029 
1030 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
1031 {
1032     return vdev->vq[n].vring.desc;
1033 }
1034 
1035 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1036 {
1037     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1038 }
1039 
1040 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1041 {
1042     return offsetof(VRingAvail, ring) +
1043         sizeof(uint64_t) * vdev->vq[n].vring.num;
1044 }
1045 
1046 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1047 {
1048     return offsetof(VRingUsed, ring) +
1049         sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1050 }
1051 
1052 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
1053 {
1054     return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
1055 	    virtio_queue_get_used_size(vdev, n);
1056 }
1057 
1058 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1059 {
1060     return vdev->vq[n].last_avail_idx;
1061 }
1062 
1063 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1064 {
1065     vdev->vq[n].last_avail_idx = idx;
1066 }
1067 
1068 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
1069 {
1070     vdev->vq[n].signalled_used_valid = false;
1071 }
1072 
1073 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
1074 {
1075     return vdev->vq + n;
1076 }
1077 
1078 uint16_t virtio_get_queue_index(VirtQueue *vq)
1079 {
1080     return vq->queue_index;
1081 }
1082 
1083 static void virtio_queue_guest_notifier_read(EventNotifier *n)
1084 {
1085     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
1086     if (event_notifier_test_and_clear(n)) {
1087         virtio_irq(vq);
1088     }
1089 }
1090 
1091 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
1092                                                 bool with_irqfd)
1093 {
1094     if (assign && !with_irqfd) {
1095         event_notifier_set_handler(&vq->guest_notifier,
1096                                    virtio_queue_guest_notifier_read);
1097     } else {
1098         event_notifier_set_handler(&vq->guest_notifier, NULL);
1099     }
1100     if (!assign) {
1101         /* Test and clear notifier before closing it,
1102          * in case poll callback didn't have time to run. */
1103         virtio_queue_guest_notifier_read(&vq->guest_notifier);
1104     }
1105 }
1106 
1107 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
1108 {
1109     return &vq->guest_notifier;
1110 }
1111 
1112 static void virtio_queue_host_notifier_read(EventNotifier *n)
1113 {
1114     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1115     if (event_notifier_test_and_clear(n)) {
1116         virtio_queue_notify_vq(vq);
1117     }
1118 }
1119 
1120 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
1121                                                bool set_handler)
1122 {
1123     if (assign && set_handler) {
1124         event_notifier_set_handler(&vq->host_notifier,
1125                                    virtio_queue_host_notifier_read);
1126     } else {
1127         event_notifier_set_handler(&vq->host_notifier, NULL);
1128     }
1129     if (!assign) {
1130         /* Test and clear notifier before after disabling event,
1131          * in case poll callback didn't have time to run. */
1132         virtio_queue_host_notifier_read(&vq->host_notifier);
1133     }
1134 }
1135 
1136 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
1137 {
1138     return &vq->host_notifier;
1139 }
1140 
1141 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
1142 {
1143     if (vdev->bus_name) {
1144         g_free(vdev->bus_name);
1145         vdev->bus_name = NULL;
1146     }
1147 
1148     if (bus_name) {
1149         vdev->bus_name = g_strdup(bus_name);
1150     }
1151 }
1152 
1153 static void virtio_device_realize(DeviceState *dev, Error **errp)
1154 {
1155     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1156     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1157     Error *err = NULL;
1158 
1159     if (vdc->realize != NULL) {
1160         vdc->realize(dev, &err);
1161         if (err != NULL) {
1162             error_propagate(errp, err);
1163             return;
1164         }
1165     }
1166     virtio_bus_device_plugged(vdev);
1167 }
1168 
1169 static void virtio_device_unrealize(DeviceState *dev, Error **errp)
1170 {
1171     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1172     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1173     Error *err = NULL;
1174 
1175     virtio_bus_device_unplugged(vdev);
1176 
1177     if (vdc->unrealize != NULL) {
1178         vdc->unrealize(dev, &err);
1179         if (err != NULL) {
1180             error_propagate(errp, err);
1181             return;
1182         }
1183     }
1184 
1185     if (vdev->bus_name) {
1186         g_free(vdev->bus_name);
1187         vdev->bus_name = NULL;
1188     }
1189 }
1190 
1191 static void virtio_device_class_init(ObjectClass *klass, void *data)
1192 {
1193     /* Set the default value here. */
1194     DeviceClass *dc = DEVICE_CLASS(klass);
1195 
1196     dc->realize = virtio_device_realize;
1197     dc->unrealize = virtio_device_unrealize;
1198     dc->bus_type = TYPE_VIRTIO_BUS;
1199 }
1200 
1201 static const TypeInfo virtio_device_info = {
1202     .name = TYPE_VIRTIO_DEVICE,
1203     .parent = TYPE_DEVICE,
1204     .instance_size = sizeof(VirtIODevice),
1205     .class_init = virtio_device_class_init,
1206     .abstract = true,
1207     .class_size = sizeof(VirtioDeviceClass),
1208 };
1209 
1210 static void virtio_register_types(void)
1211 {
1212     type_register_static(&virtio_device_info);
1213 }
1214 
1215 type_init(virtio_register_types)
1216