xref: /openbmc/qemu/hw/virtio/virtio.c (revision 6a1a8cc7)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include <inttypes.h>
15 
16 #include "trace.h"
17 #include "qemu/error-report.h"
18 #include "hw/virtio/virtio.h"
19 #include "qemu/atomic.h"
20 #include "hw/virtio/virtio-bus.h"
21 
22 /* The alignment to use between consumer and producer parts of vring.
23  * x86 pagesize again. */
24 #define VIRTIO_PCI_VRING_ALIGN         4096
25 
26 typedef struct VRingDesc
27 {
28     uint64_t addr;
29     uint32_t len;
30     uint16_t flags;
31     uint16_t next;
32 } VRingDesc;
33 
34 typedef struct VRingAvail
35 {
36     uint16_t flags;
37     uint16_t idx;
38     uint16_t ring[0];
39 } VRingAvail;
40 
41 typedef struct VRingUsedElem
42 {
43     uint32_t id;
44     uint32_t len;
45 } VRingUsedElem;
46 
47 typedef struct VRingUsed
48 {
49     uint16_t flags;
50     uint16_t idx;
51     VRingUsedElem ring[0];
52 } VRingUsed;
53 
54 typedef struct VRing
55 {
56     unsigned int num;
57     hwaddr desc;
58     hwaddr avail;
59     hwaddr used;
60 } VRing;
61 
62 struct VirtQueue
63 {
64     VRing vring;
65     hwaddr pa;
66     uint16_t last_avail_idx;
67     /* Last used index value we have signalled on */
68     uint16_t signalled_used;
69 
70     /* Last used index value we have signalled on */
71     bool signalled_used_valid;
72 
73     /* Notification enabled? */
74     bool notification;
75 
76     uint16_t queue_index;
77 
78     int inuse;
79 
80     uint16_t vector;
81     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
82     VirtIODevice *vdev;
83     EventNotifier guest_notifier;
84     EventNotifier host_notifier;
85 };
86 
87 /* virt queue functions */
88 static void virtqueue_init(VirtQueue *vq)
89 {
90     hwaddr pa = vq->pa;
91 
92     vq->vring.desc = pa;
93     vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
94     vq->vring.used = vring_align(vq->vring.avail +
95                                  offsetof(VRingAvail, ring[vq->vring.num]),
96                                  VIRTIO_PCI_VRING_ALIGN);
97 }
98 
99 static inline uint64_t vring_desc_addr(hwaddr desc_pa, int i)
100 {
101     hwaddr pa;
102     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
103     return ldq_phys(pa);
104 }
105 
106 static inline uint32_t vring_desc_len(hwaddr desc_pa, int i)
107 {
108     hwaddr pa;
109     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
110     return ldl_phys(pa);
111 }
112 
113 static inline uint16_t vring_desc_flags(hwaddr desc_pa, int i)
114 {
115     hwaddr pa;
116     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
117     return lduw_phys(pa);
118 }
119 
120 static inline uint16_t vring_desc_next(hwaddr desc_pa, int i)
121 {
122     hwaddr pa;
123     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
124     return lduw_phys(pa);
125 }
126 
127 static inline uint16_t vring_avail_flags(VirtQueue *vq)
128 {
129     hwaddr pa;
130     pa = vq->vring.avail + offsetof(VRingAvail, flags);
131     return lduw_phys(pa);
132 }
133 
134 static inline uint16_t vring_avail_idx(VirtQueue *vq)
135 {
136     hwaddr pa;
137     pa = vq->vring.avail + offsetof(VRingAvail, idx);
138     return lduw_phys(pa);
139 }
140 
141 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
142 {
143     hwaddr pa;
144     pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
145     return lduw_phys(pa);
146 }
147 
148 static inline uint16_t vring_used_event(VirtQueue *vq)
149 {
150     return vring_avail_ring(vq, vq->vring.num);
151 }
152 
153 static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
154 {
155     hwaddr pa;
156     pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
157     stl_phys(pa, val);
158 }
159 
160 static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
161 {
162     hwaddr pa;
163     pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
164     stl_phys(pa, val);
165 }
166 
167 static uint16_t vring_used_idx(VirtQueue *vq)
168 {
169     hwaddr pa;
170     pa = vq->vring.used + offsetof(VRingUsed, idx);
171     return lduw_phys(pa);
172 }
173 
174 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
175 {
176     hwaddr pa;
177     pa = vq->vring.used + offsetof(VRingUsed, idx);
178     stw_phys(pa, val);
179 }
180 
181 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
182 {
183     hwaddr pa;
184     pa = vq->vring.used + offsetof(VRingUsed, flags);
185     stw_phys(pa, lduw_phys(pa) | mask);
186 }
187 
188 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
189 {
190     hwaddr pa;
191     pa = vq->vring.used + offsetof(VRingUsed, flags);
192     stw_phys(pa, lduw_phys(pa) & ~mask);
193 }
194 
195 static inline void vring_avail_event(VirtQueue *vq, uint16_t val)
196 {
197     hwaddr pa;
198     if (!vq->notification) {
199         return;
200     }
201     pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
202     stw_phys(pa, val);
203 }
204 
205 void virtio_queue_set_notification(VirtQueue *vq, int enable)
206 {
207     vq->notification = enable;
208     if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
209         vring_avail_event(vq, vring_avail_idx(vq));
210     } else if (enable) {
211         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
212     } else {
213         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
214     }
215     if (enable) {
216         /* Expose avail event/used flags before caller checks the avail idx. */
217         smp_mb();
218     }
219 }
220 
221 int virtio_queue_ready(VirtQueue *vq)
222 {
223     return vq->vring.avail != 0;
224 }
225 
226 int virtio_queue_empty(VirtQueue *vq)
227 {
228     return vring_avail_idx(vq) == vq->last_avail_idx;
229 }
230 
231 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
232                     unsigned int len, unsigned int idx)
233 {
234     unsigned int offset;
235     int i;
236 
237     trace_virtqueue_fill(vq, elem, len, idx);
238 
239     offset = 0;
240     for (i = 0; i < elem->in_num; i++) {
241         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
242 
243         cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
244                                   elem->in_sg[i].iov_len,
245                                   1, size);
246 
247         offset += size;
248     }
249 
250     for (i = 0; i < elem->out_num; i++)
251         cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
252                                   elem->out_sg[i].iov_len,
253                                   0, elem->out_sg[i].iov_len);
254 
255     idx = (idx + vring_used_idx(vq)) % vq->vring.num;
256 
257     /* Get a pointer to the next entry in the used ring. */
258     vring_used_ring_id(vq, idx, elem->index);
259     vring_used_ring_len(vq, idx, len);
260 }
261 
262 void virtqueue_flush(VirtQueue *vq, unsigned int count)
263 {
264     uint16_t old, new;
265     /* Make sure buffer is written before we update index. */
266     smp_wmb();
267     trace_virtqueue_flush(vq, count);
268     old = vring_used_idx(vq);
269     new = old + count;
270     vring_used_idx_set(vq, new);
271     vq->inuse -= count;
272     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
273         vq->signalled_used_valid = false;
274 }
275 
276 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
277                     unsigned int len)
278 {
279     virtqueue_fill(vq, elem, len, 0);
280     virtqueue_flush(vq, 1);
281 }
282 
283 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
284 {
285     uint16_t num_heads = vring_avail_idx(vq) - idx;
286 
287     /* Check it isn't doing very strange things with descriptor numbers. */
288     if (num_heads > vq->vring.num) {
289         error_report("Guest moved used index from %u to %u",
290                      idx, vring_avail_idx(vq));
291         exit(1);
292     }
293     /* On success, callers read a descriptor at vq->last_avail_idx.
294      * Make sure descriptor read does not bypass avail index read. */
295     if (num_heads) {
296         smp_rmb();
297     }
298 
299     return num_heads;
300 }
301 
302 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
303 {
304     unsigned int head;
305 
306     /* Grab the next descriptor number they're advertising, and increment
307      * the index we've seen. */
308     head = vring_avail_ring(vq, idx % vq->vring.num);
309 
310     /* If their number is silly, that's a fatal mistake. */
311     if (head >= vq->vring.num) {
312         error_report("Guest says index %u is available", head);
313         exit(1);
314     }
315 
316     return head;
317 }
318 
319 static unsigned virtqueue_next_desc(hwaddr desc_pa,
320                                     unsigned int i, unsigned int max)
321 {
322     unsigned int next;
323 
324     /* If this descriptor says it doesn't chain, we're done. */
325     if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
326         return max;
327 
328     /* Check they're not leading us off end of descriptors. */
329     next = vring_desc_next(desc_pa, i);
330     /* Make sure compiler knows to grab that: we don't want it changing! */
331     smp_wmb();
332 
333     if (next >= max) {
334         error_report("Desc next is %u", next);
335         exit(1);
336     }
337 
338     return next;
339 }
340 
341 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
342                                unsigned int *out_bytes,
343                                unsigned max_in_bytes, unsigned max_out_bytes)
344 {
345     unsigned int idx;
346     unsigned int total_bufs, in_total, out_total;
347 
348     idx = vq->last_avail_idx;
349 
350     total_bufs = in_total = out_total = 0;
351     while (virtqueue_num_heads(vq, idx)) {
352         unsigned int max, num_bufs, indirect = 0;
353         hwaddr desc_pa;
354         int i;
355 
356         max = vq->vring.num;
357         num_bufs = total_bufs;
358         i = virtqueue_get_head(vq, idx++);
359         desc_pa = vq->vring.desc;
360 
361         if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
362             if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
363                 error_report("Invalid size for indirect buffer table");
364                 exit(1);
365             }
366 
367             /* If we've got too many, that implies a descriptor loop. */
368             if (num_bufs >= max) {
369                 error_report("Looped descriptor");
370                 exit(1);
371             }
372 
373             /* loop over the indirect descriptor table */
374             indirect = 1;
375             max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
376             num_bufs = i = 0;
377             desc_pa = vring_desc_addr(desc_pa, i);
378         }
379 
380         do {
381             /* If we've got too many, that implies a descriptor loop. */
382             if (++num_bufs > max) {
383                 error_report("Looped descriptor");
384                 exit(1);
385             }
386 
387             if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
388                 in_total += vring_desc_len(desc_pa, i);
389             } else {
390                 out_total += vring_desc_len(desc_pa, i);
391             }
392             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
393                 goto done;
394             }
395         } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
396 
397         if (!indirect)
398             total_bufs = num_bufs;
399         else
400             total_bufs++;
401     }
402 done:
403     if (in_bytes) {
404         *in_bytes = in_total;
405     }
406     if (out_bytes) {
407         *out_bytes = out_total;
408     }
409 }
410 
411 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
412                           unsigned int out_bytes)
413 {
414     unsigned int in_total, out_total;
415 
416     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
417     return in_bytes <= in_total && out_bytes <= out_total;
418 }
419 
420 void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
421     size_t num_sg, int is_write)
422 {
423     unsigned int i;
424     hwaddr len;
425 
426     for (i = 0; i < num_sg; i++) {
427         len = sg[i].iov_len;
428         sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
429         if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
430             error_report("virtio: trying to map MMIO memory");
431             exit(1);
432         }
433     }
434 }
435 
436 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
437 {
438     unsigned int i, head, max;
439     hwaddr desc_pa = vq->vring.desc;
440 
441     if (!virtqueue_num_heads(vq, vq->last_avail_idx))
442         return 0;
443 
444     /* When we start there are none of either input nor output. */
445     elem->out_num = elem->in_num = 0;
446 
447     max = vq->vring.num;
448 
449     i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
450     if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
451         vring_avail_event(vq, vring_avail_idx(vq));
452     }
453 
454     if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
455         if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
456             error_report("Invalid size for indirect buffer table");
457             exit(1);
458         }
459 
460         /* loop over the indirect descriptor table */
461         max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
462         desc_pa = vring_desc_addr(desc_pa, i);
463         i = 0;
464     }
465 
466     /* Collect all the descriptors */
467     do {
468         struct iovec *sg;
469 
470         if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
471             if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
472                 error_report("Too many write descriptors in indirect table");
473                 exit(1);
474             }
475             elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
476             sg = &elem->in_sg[elem->in_num++];
477         } else {
478             if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
479                 error_report("Too many read descriptors in indirect table");
480                 exit(1);
481             }
482             elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
483             sg = &elem->out_sg[elem->out_num++];
484         }
485 
486         sg->iov_len = vring_desc_len(desc_pa, i);
487 
488         /* If we've got too many, that implies a descriptor loop. */
489         if ((elem->in_num + elem->out_num) > max) {
490             error_report("Looped descriptor");
491             exit(1);
492         }
493     } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
494 
495     /* Now map what we have collected */
496     virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
497     virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
498 
499     elem->index = head;
500 
501     vq->inuse++;
502 
503     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
504     return elem->in_num + elem->out_num;
505 }
506 
507 /* virtio device */
508 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
509 {
510     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
511     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
512 
513     if (k->notify) {
514         k->notify(qbus->parent, vector);
515     }
516 }
517 
518 void virtio_update_irq(VirtIODevice *vdev)
519 {
520     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
521 }
522 
523 void virtio_set_status(VirtIODevice *vdev, uint8_t val)
524 {
525     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
526     trace_virtio_set_status(vdev, val);
527 
528     if (k->set_status) {
529         k->set_status(vdev, val);
530     }
531     vdev->status = val;
532 }
533 
534 void virtio_reset(void *opaque)
535 {
536     VirtIODevice *vdev = opaque;
537     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
538     int i;
539 
540     virtio_set_status(vdev, 0);
541 
542     if (k->reset) {
543         k->reset(vdev);
544     }
545 
546     vdev->guest_features = 0;
547     vdev->queue_sel = 0;
548     vdev->status = 0;
549     vdev->isr = 0;
550     vdev->config_vector = VIRTIO_NO_VECTOR;
551     virtio_notify_vector(vdev, vdev->config_vector);
552 
553     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
554         vdev->vq[i].vring.desc = 0;
555         vdev->vq[i].vring.avail = 0;
556         vdev->vq[i].vring.used = 0;
557         vdev->vq[i].last_avail_idx = 0;
558         vdev->vq[i].pa = 0;
559         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
560         vdev->vq[i].signalled_used = 0;
561         vdev->vq[i].signalled_used_valid = false;
562         vdev->vq[i].notification = true;
563     }
564 }
565 
566 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
567 {
568     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
569     uint8_t val;
570 
571     k->get_config(vdev, vdev->config);
572 
573     if (addr > (vdev->config_len - sizeof(val)))
574         return (uint32_t)-1;
575 
576     val = ldub_p(vdev->config + addr);
577     return val;
578 }
579 
580 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
581 {
582     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
583     uint16_t val;
584 
585     k->get_config(vdev, vdev->config);
586 
587     if (addr > (vdev->config_len - sizeof(val)))
588         return (uint32_t)-1;
589 
590     val = lduw_p(vdev->config + addr);
591     return val;
592 }
593 
594 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
595 {
596     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
597     uint32_t val;
598 
599     k->get_config(vdev, vdev->config);
600 
601     if (addr > (vdev->config_len - sizeof(val)))
602         return (uint32_t)-1;
603 
604     val = ldl_p(vdev->config + addr);
605     return val;
606 }
607 
608 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
609 {
610     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
611     uint8_t val = data;
612 
613     if (addr > (vdev->config_len - sizeof(val)))
614         return;
615 
616     stb_p(vdev->config + addr, val);
617 
618     if (k->set_config) {
619         k->set_config(vdev, vdev->config);
620     }
621 }
622 
623 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
624 {
625     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
626     uint16_t val = data;
627 
628     if (addr > (vdev->config_len - sizeof(val)))
629         return;
630 
631     stw_p(vdev->config + addr, val);
632 
633     if (k->set_config) {
634         k->set_config(vdev, vdev->config);
635     }
636 }
637 
638 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
639 {
640     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
641     uint32_t val = data;
642 
643     if (addr > (vdev->config_len - sizeof(val)))
644         return;
645 
646     stl_p(vdev->config + addr, val);
647 
648     if (k->set_config) {
649         k->set_config(vdev, vdev->config);
650     }
651 }
652 
653 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
654 {
655     vdev->vq[n].pa = addr;
656     virtqueue_init(&vdev->vq[n]);
657 }
658 
659 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
660 {
661     return vdev->vq[n].pa;
662 }
663 
664 int virtio_queue_get_num(VirtIODevice *vdev, int n)
665 {
666     return vdev->vq[n].vring.num;
667 }
668 
669 int virtio_queue_get_id(VirtQueue *vq)
670 {
671     VirtIODevice *vdev = vq->vdev;
672     assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]);
673     return vq - &vdev->vq[0];
674 }
675 
676 void virtio_queue_notify_vq(VirtQueue *vq)
677 {
678     if (vq->vring.desc) {
679         VirtIODevice *vdev = vq->vdev;
680         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
681         vq->handle_output(vdev, vq);
682     }
683 }
684 
685 void virtio_queue_notify(VirtIODevice *vdev, int n)
686 {
687     virtio_queue_notify_vq(&vdev->vq[n]);
688 }
689 
690 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
691 {
692     return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
693         VIRTIO_NO_VECTOR;
694 }
695 
696 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
697 {
698     if (n < VIRTIO_PCI_QUEUE_MAX)
699         vdev->vq[n].vector = vector;
700 }
701 
702 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
703                             void (*handle_output)(VirtIODevice *, VirtQueue *))
704 {
705     int i;
706 
707     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
708         if (vdev->vq[i].vring.num == 0)
709             break;
710     }
711 
712     if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
713         abort();
714 
715     vdev->vq[i].vring.num = queue_size;
716     vdev->vq[i].handle_output = handle_output;
717 
718     return &vdev->vq[i];
719 }
720 
721 void virtio_del_queue(VirtIODevice *vdev, int n)
722 {
723     if (n < 0 || n >= VIRTIO_PCI_QUEUE_MAX) {
724         abort();
725     }
726 
727     vdev->vq[n].vring.num = 0;
728 }
729 
730 void virtio_irq(VirtQueue *vq)
731 {
732     trace_virtio_irq(vq);
733     vq->vdev->isr |= 0x01;
734     virtio_notify_vector(vq->vdev, vq->vector);
735 }
736 
737 /* Assuming a given event_idx value from the other size, if
738  * we have just incremented index from old to new_idx,
739  * should we trigger an event? */
740 static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old)
741 {
742 	/* Note: Xen has similar logic for notification hold-off
743 	 * in include/xen/interface/io/ring.h with req_event and req_prod
744 	 * corresponding to event_idx + 1 and new respectively.
745 	 * Note also that req_event and req_prod in Xen start at 1,
746 	 * event indexes in virtio start at 0. */
747 	return (uint16_t)(new - event - 1) < (uint16_t)(new - old);
748 }
749 
750 static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
751 {
752     uint16_t old, new;
753     bool v;
754     /* We need to expose used array entries before checking used event. */
755     smp_mb();
756     /* Always notify when queue is empty (when feature acknowledge) */
757     if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) &&
758          !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) {
759         return true;
760     }
761 
762     if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
763         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
764     }
765 
766     v = vq->signalled_used_valid;
767     vq->signalled_used_valid = true;
768     old = vq->signalled_used;
769     new = vq->signalled_used = vring_used_idx(vq);
770     return !v || vring_need_event(vring_used_event(vq), new, old);
771 }
772 
773 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
774 {
775     if (!vring_notify(vdev, vq)) {
776         return;
777     }
778 
779     trace_virtio_notify(vdev, vq);
780     vdev->isr |= 0x01;
781     virtio_notify_vector(vdev, vq->vector);
782 }
783 
784 void virtio_notify_config(VirtIODevice *vdev)
785 {
786     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
787         return;
788 
789     vdev->isr |= 0x03;
790     virtio_notify_vector(vdev, vdev->config_vector);
791 }
792 
793 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
794 {
795     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
796     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
797     int i;
798 
799     if (k->save_config) {
800         k->save_config(qbus->parent, f);
801     }
802 
803     qemu_put_8s(f, &vdev->status);
804     qemu_put_8s(f, &vdev->isr);
805     qemu_put_be16s(f, &vdev->queue_sel);
806     qemu_put_be32s(f, &vdev->guest_features);
807     qemu_put_be32(f, vdev->config_len);
808     qemu_put_buffer(f, vdev->config, vdev->config_len);
809 
810     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
811         if (vdev->vq[i].vring.num == 0)
812             break;
813     }
814 
815     qemu_put_be32(f, i);
816 
817     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
818         if (vdev->vq[i].vring.num == 0)
819             break;
820 
821         qemu_put_be32(f, vdev->vq[i].vring.num);
822         qemu_put_be64(f, vdev->vq[i].pa);
823         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
824         if (k->save_queue) {
825             k->save_queue(qbus->parent, i, f);
826         }
827     }
828 }
829 
830 int virtio_set_features(VirtIODevice *vdev, uint32_t val)
831 {
832     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
833     VirtioBusClass *vbusk = VIRTIO_BUS_GET_CLASS(qbus);
834     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
835     uint32_t supported_features = vbusk->get_features(qbus->parent);
836     bool bad = (val & ~supported_features) != 0;
837 
838     val &= supported_features;
839     if (k->set_features) {
840         k->set_features(vdev, val);
841     }
842     vdev->guest_features = val;
843     return bad ? -1 : 0;
844 }
845 
846 int virtio_load(VirtIODevice *vdev, QEMUFile *f)
847 {
848     int num, i, ret;
849     uint32_t features;
850     uint32_t supported_features;
851     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
852     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
853 
854     if (k->load_config) {
855         ret = k->load_config(qbus->parent, f);
856         if (ret)
857             return ret;
858     }
859 
860     qemu_get_8s(f, &vdev->status);
861     qemu_get_8s(f, &vdev->isr);
862     qemu_get_be16s(f, &vdev->queue_sel);
863     qemu_get_be32s(f, &features);
864 
865     if (virtio_set_features(vdev, features) < 0) {
866         supported_features = k->get_features(qbus->parent);
867         error_report("Features 0x%x unsupported. Allowed features: 0x%x",
868                      features, supported_features);
869         return -1;
870     }
871     vdev->config_len = qemu_get_be32(f);
872     qemu_get_buffer(f, vdev->config, vdev->config_len);
873 
874     num = qemu_get_be32(f);
875 
876     for (i = 0; i < num; i++) {
877         vdev->vq[i].vring.num = qemu_get_be32(f);
878         vdev->vq[i].pa = qemu_get_be64(f);
879         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
880         vdev->vq[i].signalled_used_valid = false;
881         vdev->vq[i].notification = true;
882 
883         if (vdev->vq[i].pa) {
884             uint16_t nheads;
885             virtqueue_init(&vdev->vq[i]);
886             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
887             /* Check it isn't doing very strange things with descriptor numbers. */
888             if (nheads > vdev->vq[i].vring.num) {
889                 error_report("VQ %d size 0x%x Guest index 0x%x "
890                              "inconsistent with Host index 0x%x: delta 0x%x",
891                              i, vdev->vq[i].vring.num,
892                              vring_avail_idx(&vdev->vq[i]),
893                              vdev->vq[i].last_avail_idx, nheads);
894                 return -1;
895             }
896         } else if (vdev->vq[i].last_avail_idx) {
897             error_report("VQ %d address 0x0 "
898                          "inconsistent with Host index 0x%x",
899                          i, vdev->vq[i].last_avail_idx);
900                 return -1;
901 	}
902         if (k->load_queue) {
903             ret = k->load_queue(qbus->parent, i, f);
904             if (ret)
905                 return ret;
906         }
907     }
908 
909     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
910     return 0;
911 }
912 
913 void virtio_cleanup(VirtIODevice *vdev)
914 {
915     qemu_del_vm_change_state_handler(vdev->vmstate);
916     g_free(vdev->config);
917     g_free(vdev->vq);
918 }
919 
920 static void virtio_vmstate_change(void *opaque, int running, RunState state)
921 {
922     VirtIODevice *vdev = opaque;
923     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
924     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
925     bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
926     vdev->vm_running = running;
927 
928     if (backend_run) {
929         virtio_set_status(vdev, vdev->status);
930     }
931 
932     if (k->vmstate_change) {
933         k->vmstate_change(qbus->parent, backend_run);
934     }
935 
936     if (!backend_run) {
937         virtio_set_status(vdev, vdev->status);
938     }
939 }
940 
941 void virtio_init(VirtIODevice *vdev, const char *name,
942                  uint16_t device_id, size_t config_size)
943 {
944     int i;
945     vdev->device_id = device_id;
946     vdev->status = 0;
947     vdev->isr = 0;
948     vdev->queue_sel = 0;
949     vdev->config_vector = VIRTIO_NO_VECTOR;
950     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
951     vdev->vm_running = runstate_is_running();
952     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
953         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
954         vdev->vq[i].vdev = vdev;
955         vdev->vq[i].queue_index = i;
956     }
957 
958     vdev->name = name;
959     vdev->config_len = config_size;
960     if (vdev->config_len) {
961         vdev->config = g_malloc0(config_size);
962     } else {
963         vdev->config = NULL;
964     }
965     vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
966                                                      vdev);
967 }
968 
969 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
970 {
971     return vdev->vq[n].vring.desc;
972 }
973 
974 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
975 {
976     return vdev->vq[n].vring.avail;
977 }
978 
979 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
980 {
981     return vdev->vq[n].vring.used;
982 }
983 
984 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
985 {
986     return vdev->vq[n].vring.desc;
987 }
988 
989 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
990 {
991     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
992 }
993 
994 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
995 {
996     return offsetof(VRingAvail, ring) +
997         sizeof(uint64_t) * vdev->vq[n].vring.num;
998 }
999 
1000 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1001 {
1002     return offsetof(VRingUsed, ring) +
1003         sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1004 }
1005 
1006 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
1007 {
1008     return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
1009 	    virtio_queue_get_used_size(vdev, n);
1010 }
1011 
1012 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1013 {
1014     return vdev->vq[n].last_avail_idx;
1015 }
1016 
1017 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1018 {
1019     vdev->vq[n].last_avail_idx = idx;
1020 }
1021 
1022 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
1023 {
1024     return vdev->vq + n;
1025 }
1026 
1027 uint16_t virtio_get_queue_index(VirtQueue *vq)
1028 {
1029     return vq->queue_index;
1030 }
1031 
1032 static void virtio_queue_guest_notifier_read(EventNotifier *n)
1033 {
1034     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
1035     if (event_notifier_test_and_clear(n)) {
1036         virtio_irq(vq);
1037     }
1038 }
1039 
1040 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
1041                                                 bool with_irqfd)
1042 {
1043     if (assign && !with_irqfd) {
1044         event_notifier_set_handler(&vq->guest_notifier,
1045                                    virtio_queue_guest_notifier_read);
1046     } else {
1047         event_notifier_set_handler(&vq->guest_notifier, NULL);
1048     }
1049     if (!assign) {
1050         /* Test and clear notifier before closing it,
1051          * in case poll callback didn't have time to run. */
1052         virtio_queue_guest_notifier_read(&vq->guest_notifier);
1053     }
1054 }
1055 
1056 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
1057 {
1058     return &vq->guest_notifier;
1059 }
1060 
1061 static void virtio_queue_host_notifier_read(EventNotifier *n)
1062 {
1063     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1064     if (event_notifier_test_and_clear(n)) {
1065         virtio_queue_notify_vq(vq);
1066     }
1067 }
1068 
1069 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
1070                                                bool set_handler)
1071 {
1072     if (assign && set_handler) {
1073         event_notifier_set_handler(&vq->host_notifier,
1074                                    virtio_queue_host_notifier_read);
1075     } else {
1076         event_notifier_set_handler(&vq->host_notifier, NULL);
1077     }
1078     if (!assign) {
1079         /* Test and clear notifier before after disabling event,
1080          * in case poll callback didn't have time to run. */
1081         virtio_queue_host_notifier_read(&vq->host_notifier);
1082     }
1083 }
1084 
1085 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
1086 {
1087     return &vq->host_notifier;
1088 }
1089 
1090 static int virtio_device_init(DeviceState *qdev)
1091 {
1092     VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
1093     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(qdev);
1094     assert(k->init != NULL);
1095     if (k->init(vdev) < 0) {
1096         return -1;
1097     }
1098     virtio_bus_plug_device(vdev);
1099     return 0;
1100 }
1101 
1102 static void virtio_device_class_init(ObjectClass *klass, void *data)
1103 {
1104     /* Set the default value here. */
1105     DeviceClass *dc = DEVICE_CLASS(klass);
1106     dc->init = virtio_device_init;
1107     dc->bus_type = TYPE_VIRTIO_BUS;
1108 }
1109 
1110 static const TypeInfo virtio_device_info = {
1111     .name = TYPE_VIRTIO_DEVICE,
1112     .parent = TYPE_DEVICE,
1113     .instance_size = sizeof(VirtIODevice),
1114     .class_init = virtio_device_class_init,
1115     .abstract = true,
1116     .class_size = sizeof(VirtioDeviceClass),
1117 };
1118 
1119 static void virtio_register_types(void)
1120 {
1121     type_register_static(&virtio_device_info);
1122 }
1123 
1124 type_init(virtio_register_types)
1125