xref: /openbmc/qemu/hw/virtio/virtio.c (revision b8bcf811)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include <inttypes.h>
15 
16 #include "trace.h"
17 #include "exec/address-spaces.h"
18 #include "qemu/error-report.h"
19 #include "hw/virtio/virtio.h"
20 #include "qemu/atomic.h"
21 #include "hw/virtio/virtio-bus.h"
22 
23 /*
24  * The alignment to use between consumer and producer parts of vring.
25  * x86 pagesize again. This is the default, used by transports like PCI
26  * which don't provide a means for the guest to tell the host the alignment.
27  */
28 #define VIRTIO_PCI_VRING_ALIGN         4096
29 
30 typedef struct VRingDesc
31 {
32     uint64_t addr;
33     uint32_t len;
34     uint16_t flags;
35     uint16_t next;
36 } VRingDesc;
37 
38 typedef struct VRingAvail
39 {
40     uint16_t flags;
41     uint16_t idx;
42     uint16_t ring[0];
43 } VRingAvail;
44 
45 typedef struct VRingUsedElem
46 {
47     uint32_t id;
48     uint32_t len;
49 } VRingUsedElem;
50 
51 typedef struct VRingUsed
52 {
53     uint16_t flags;
54     uint16_t idx;
55     VRingUsedElem ring[0];
56 } VRingUsed;
57 
58 typedef struct VRing
59 {
60     unsigned int num;
61     unsigned int align;
62     hwaddr desc;
63     hwaddr avail;
64     hwaddr used;
65 } VRing;
66 
67 struct VirtQueue
68 {
69     VRing vring;
70     hwaddr pa;
71     uint16_t last_avail_idx;
72     /* Last used index value we have signalled on */
73     uint16_t signalled_used;
74 
75     /* Last used index value we have signalled on */
76     bool signalled_used_valid;
77 
78     /* Notification enabled? */
79     bool notification;
80 
81     uint16_t queue_index;
82 
83     int inuse;
84 
85     uint16_t vector;
86     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
87     VirtIODevice *vdev;
88     EventNotifier guest_notifier;
89     EventNotifier host_notifier;
90 };
91 
92 /* virt queue functions */
93 static void virtqueue_init(VirtQueue *vq)
94 {
95     hwaddr pa = vq->pa;
96 
97     vq->vring.desc = pa;
98     vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
99     vq->vring.used = vring_align(vq->vring.avail +
100                                  offsetof(VRingAvail, ring[vq->vring.num]),
101                                  vq->vring.align);
102 }
103 
104 static inline uint64_t vring_desc_addr(hwaddr desc_pa, int i)
105 {
106     hwaddr pa;
107     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
108     return ldq_phys(&address_space_memory, pa);
109 }
110 
111 static inline uint32_t vring_desc_len(hwaddr desc_pa, int i)
112 {
113     hwaddr pa;
114     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
115     return ldl_phys(&address_space_memory, pa);
116 }
117 
118 static inline uint16_t vring_desc_flags(hwaddr desc_pa, int i)
119 {
120     hwaddr pa;
121     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
122     return lduw_phys(&address_space_memory, pa);
123 }
124 
125 static inline uint16_t vring_desc_next(hwaddr desc_pa, int i)
126 {
127     hwaddr pa;
128     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
129     return lduw_phys(&address_space_memory, pa);
130 }
131 
132 static inline uint16_t vring_avail_flags(VirtQueue *vq)
133 {
134     hwaddr pa;
135     pa = vq->vring.avail + offsetof(VRingAvail, flags);
136     return lduw_phys(&address_space_memory, pa);
137 }
138 
139 static inline uint16_t vring_avail_idx(VirtQueue *vq)
140 {
141     hwaddr pa;
142     pa = vq->vring.avail + offsetof(VRingAvail, idx);
143     return lduw_phys(&address_space_memory, pa);
144 }
145 
146 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
147 {
148     hwaddr pa;
149     pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
150     return lduw_phys(&address_space_memory, pa);
151 }
152 
153 static inline uint16_t vring_used_event(VirtQueue *vq)
154 {
155     return vring_avail_ring(vq, vq->vring.num);
156 }
157 
158 static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
159 {
160     hwaddr pa;
161     pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
162     stl_phys(&address_space_memory, pa, val);
163 }
164 
165 static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
166 {
167     hwaddr pa;
168     pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
169     stl_phys(&address_space_memory, pa, val);
170 }
171 
172 static uint16_t vring_used_idx(VirtQueue *vq)
173 {
174     hwaddr pa;
175     pa = vq->vring.used + offsetof(VRingUsed, idx);
176     return lduw_phys(&address_space_memory, pa);
177 }
178 
179 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
180 {
181     hwaddr pa;
182     pa = vq->vring.used + offsetof(VRingUsed, idx);
183     stw_phys(&address_space_memory, pa, val);
184 }
185 
186 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
187 {
188     hwaddr pa;
189     pa = vq->vring.used + offsetof(VRingUsed, flags);
190     stw_phys(&address_space_memory,
191              pa, lduw_phys(&address_space_memory, pa) | mask);
192 }
193 
194 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
195 {
196     hwaddr pa;
197     pa = vq->vring.used + offsetof(VRingUsed, flags);
198     stw_phys(&address_space_memory,
199              pa, lduw_phys(&address_space_memory, pa) & ~mask);
200 }
201 
202 static inline void vring_avail_event(VirtQueue *vq, uint16_t val)
203 {
204     hwaddr pa;
205     if (!vq->notification) {
206         return;
207     }
208     pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
209     stw_phys(&address_space_memory, pa, val);
210 }
211 
212 void virtio_queue_set_notification(VirtQueue *vq, int enable)
213 {
214     vq->notification = enable;
215     if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
216         vring_avail_event(vq, vring_avail_idx(vq));
217     } else if (enable) {
218         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
219     } else {
220         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
221     }
222     if (enable) {
223         /* Expose avail event/used flags before caller checks the avail idx. */
224         smp_mb();
225     }
226 }
227 
228 int virtio_queue_ready(VirtQueue *vq)
229 {
230     return vq->vring.avail != 0;
231 }
232 
233 int virtio_queue_empty(VirtQueue *vq)
234 {
235     return vring_avail_idx(vq) == vq->last_avail_idx;
236 }
237 
238 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
239                     unsigned int len, unsigned int idx)
240 {
241     unsigned int offset;
242     int i;
243 
244     trace_virtqueue_fill(vq, elem, len, idx);
245 
246     offset = 0;
247     for (i = 0; i < elem->in_num; i++) {
248         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
249 
250         cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
251                                   elem->in_sg[i].iov_len,
252                                   1, size);
253 
254         offset += size;
255     }
256 
257     for (i = 0; i < elem->out_num; i++)
258         cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
259                                   elem->out_sg[i].iov_len,
260                                   0, elem->out_sg[i].iov_len);
261 
262     idx = (idx + vring_used_idx(vq)) % vq->vring.num;
263 
264     /* Get a pointer to the next entry in the used ring. */
265     vring_used_ring_id(vq, idx, elem->index);
266     vring_used_ring_len(vq, idx, len);
267 }
268 
269 void virtqueue_flush(VirtQueue *vq, unsigned int count)
270 {
271     uint16_t old, new;
272     /* Make sure buffer is written before we update index. */
273     smp_wmb();
274     trace_virtqueue_flush(vq, count);
275     old = vring_used_idx(vq);
276     new = old + count;
277     vring_used_idx_set(vq, new);
278     vq->inuse -= count;
279     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
280         vq->signalled_used_valid = false;
281 }
282 
283 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
284                     unsigned int len)
285 {
286     virtqueue_fill(vq, elem, len, 0);
287     virtqueue_flush(vq, 1);
288 }
289 
290 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
291 {
292     uint16_t num_heads = vring_avail_idx(vq) - idx;
293 
294     /* Check it isn't doing very strange things with descriptor numbers. */
295     if (num_heads > vq->vring.num) {
296         error_report("Guest moved used index from %u to %u",
297                      idx, vring_avail_idx(vq));
298         exit(1);
299     }
300     /* On success, callers read a descriptor at vq->last_avail_idx.
301      * Make sure descriptor read does not bypass avail index read. */
302     if (num_heads) {
303         smp_rmb();
304     }
305 
306     return num_heads;
307 }
308 
309 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
310 {
311     unsigned int head;
312 
313     /* Grab the next descriptor number they're advertising, and increment
314      * the index we've seen. */
315     head = vring_avail_ring(vq, idx % vq->vring.num);
316 
317     /* If their number is silly, that's a fatal mistake. */
318     if (head >= vq->vring.num) {
319         error_report("Guest says index %u is available", head);
320         exit(1);
321     }
322 
323     return head;
324 }
325 
326 static unsigned virtqueue_next_desc(hwaddr desc_pa,
327                                     unsigned int i, unsigned int max)
328 {
329     unsigned int next;
330 
331     /* If this descriptor says it doesn't chain, we're done. */
332     if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
333         return max;
334 
335     /* Check they're not leading us off end of descriptors. */
336     next = vring_desc_next(desc_pa, i);
337     /* Make sure compiler knows to grab that: we don't want it changing! */
338     smp_wmb();
339 
340     if (next >= max) {
341         error_report("Desc next is %u", next);
342         exit(1);
343     }
344 
345     return next;
346 }
347 
348 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
349                                unsigned int *out_bytes,
350                                unsigned max_in_bytes, unsigned max_out_bytes)
351 {
352     unsigned int idx;
353     unsigned int total_bufs, in_total, out_total;
354 
355     idx = vq->last_avail_idx;
356 
357     total_bufs = in_total = out_total = 0;
358     while (virtqueue_num_heads(vq, idx)) {
359         unsigned int max, num_bufs, indirect = 0;
360         hwaddr desc_pa;
361         int i;
362 
363         max = vq->vring.num;
364         num_bufs = total_bufs;
365         i = virtqueue_get_head(vq, idx++);
366         desc_pa = vq->vring.desc;
367 
368         if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
369             if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
370                 error_report("Invalid size for indirect buffer table");
371                 exit(1);
372             }
373 
374             /* If we've got too many, that implies a descriptor loop. */
375             if (num_bufs >= max) {
376                 error_report("Looped descriptor");
377                 exit(1);
378             }
379 
380             /* loop over the indirect descriptor table */
381             indirect = 1;
382             max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
383             desc_pa = vring_desc_addr(desc_pa, i);
384             num_bufs = i = 0;
385         }
386 
387         do {
388             /* If we've got too many, that implies a descriptor loop. */
389             if (++num_bufs > max) {
390                 error_report("Looped descriptor");
391                 exit(1);
392             }
393 
394             if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
395                 in_total += vring_desc_len(desc_pa, i);
396             } else {
397                 out_total += vring_desc_len(desc_pa, i);
398             }
399             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
400                 goto done;
401             }
402         } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
403 
404         if (!indirect)
405             total_bufs = num_bufs;
406         else
407             total_bufs++;
408     }
409 done:
410     if (in_bytes) {
411         *in_bytes = in_total;
412     }
413     if (out_bytes) {
414         *out_bytes = out_total;
415     }
416 }
417 
418 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
419                           unsigned int out_bytes)
420 {
421     unsigned int in_total, out_total;
422 
423     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
424     return in_bytes <= in_total && out_bytes <= out_total;
425 }
426 
427 void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
428     size_t num_sg, int is_write)
429 {
430     unsigned int i;
431     hwaddr len;
432 
433     for (i = 0; i < num_sg; i++) {
434         len = sg[i].iov_len;
435         sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
436         if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
437             error_report("virtio: trying to map MMIO memory");
438             exit(1);
439         }
440     }
441 }
442 
443 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
444 {
445     unsigned int i, head, max;
446     hwaddr desc_pa = vq->vring.desc;
447 
448     if (!virtqueue_num_heads(vq, vq->last_avail_idx))
449         return 0;
450 
451     /* When we start there are none of either input nor output. */
452     elem->out_num = elem->in_num = 0;
453 
454     max = vq->vring.num;
455 
456     i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
457     if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
458         vring_avail_event(vq, vring_avail_idx(vq));
459     }
460 
461     if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
462         if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
463             error_report("Invalid size for indirect buffer table");
464             exit(1);
465         }
466 
467         /* loop over the indirect descriptor table */
468         max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
469         desc_pa = vring_desc_addr(desc_pa, i);
470         i = 0;
471     }
472 
473     /* Collect all the descriptors */
474     do {
475         struct iovec *sg;
476 
477         if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
478             if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
479                 error_report("Too many write descriptors in indirect table");
480                 exit(1);
481             }
482             elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
483             sg = &elem->in_sg[elem->in_num++];
484         } else {
485             if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
486                 error_report("Too many read descriptors in indirect table");
487                 exit(1);
488             }
489             elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
490             sg = &elem->out_sg[elem->out_num++];
491         }
492 
493         sg->iov_len = vring_desc_len(desc_pa, i);
494 
495         /* If we've got too many, that implies a descriptor loop. */
496         if ((elem->in_num + elem->out_num) > max) {
497             error_report("Looped descriptor");
498             exit(1);
499         }
500     } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
501 
502     /* Now map what we have collected */
503     virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
504     virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
505 
506     elem->index = head;
507 
508     vq->inuse++;
509 
510     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
511     return elem->in_num + elem->out_num;
512 }
513 
514 /* virtio device */
515 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
516 {
517     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
518     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
519 
520     if (k->notify) {
521         k->notify(qbus->parent, vector);
522     }
523 }
524 
525 void virtio_update_irq(VirtIODevice *vdev)
526 {
527     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
528 }
529 
530 void virtio_set_status(VirtIODevice *vdev, uint8_t val)
531 {
532     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
533     trace_virtio_set_status(vdev, val);
534 
535     if (k->set_status) {
536         k->set_status(vdev, val);
537     }
538     vdev->status = val;
539 }
540 
541 void virtio_reset(void *opaque)
542 {
543     VirtIODevice *vdev = opaque;
544     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
545     int i;
546 
547     virtio_set_status(vdev, 0);
548 
549     if (k->reset) {
550         k->reset(vdev);
551     }
552 
553     vdev->guest_features = 0;
554     vdev->queue_sel = 0;
555     vdev->status = 0;
556     vdev->isr = 0;
557     vdev->config_vector = VIRTIO_NO_VECTOR;
558     virtio_notify_vector(vdev, vdev->config_vector);
559 
560     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
561         vdev->vq[i].vring.desc = 0;
562         vdev->vq[i].vring.avail = 0;
563         vdev->vq[i].vring.used = 0;
564         vdev->vq[i].last_avail_idx = 0;
565         vdev->vq[i].pa = 0;
566         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
567         vdev->vq[i].signalled_used = 0;
568         vdev->vq[i].signalled_used_valid = false;
569         vdev->vq[i].notification = true;
570     }
571 }
572 
573 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
574 {
575     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
576     uint8_t val;
577 
578     if (addr + sizeof(val) > vdev->config_len) {
579         return (uint32_t)-1;
580     }
581 
582     k->get_config(vdev, vdev->config);
583 
584     val = ldub_p(vdev->config + addr);
585     return val;
586 }
587 
588 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
589 {
590     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
591     uint16_t val;
592 
593     if (addr + sizeof(val) > vdev->config_len) {
594         return (uint32_t)-1;
595     }
596 
597     k->get_config(vdev, vdev->config);
598 
599     val = lduw_p(vdev->config + addr);
600     return val;
601 }
602 
603 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
604 {
605     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
606     uint32_t val;
607 
608     if (addr + sizeof(val) > vdev->config_len) {
609         return (uint32_t)-1;
610     }
611 
612     k->get_config(vdev, vdev->config);
613 
614     val = ldl_p(vdev->config + addr);
615     return val;
616 }
617 
618 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
619 {
620     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
621     uint8_t val = data;
622 
623     if (addr + sizeof(val) > vdev->config_len) {
624         return;
625     }
626 
627     stb_p(vdev->config + addr, val);
628 
629     if (k->set_config) {
630         k->set_config(vdev, vdev->config);
631     }
632 }
633 
634 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
635 {
636     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
637     uint16_t val = data;
638 
639     if (addr + sizeof(val) > vdev->config_len) {
640         return;
641     }
642 
643     stw_p(vdev->config + addr, val);
644 
645     if (k->set_config) {
646         k->set_config(vdev, vdev->config);
647     }
648 }
649 
650 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
651 {
652     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
653     uint32_t val = data;
654 
655     if (addr + sizeof(val) > vdev->config_len) {
656         return;
657     }
658 
659     stl_p(vdev->config + addr, val);
660 
661     if (k->set_config) {
662         k->set_config(vdev, vdev->config);
663     }
664 }
665 
666 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
667 {
668     vdev->vq[n].pa = addr;
669     virtqueue_init(&vdev->vq[n]);
670 }
671 
672 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
673 {
674     return vdev->vq[n].pa;
675 }
676 
677 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
678 {
679     /* Don't allow guest to flip queue between existent and
680      * nonexistent states, or to set it to an invalid size.
681      */
682     if (!!num != !!vdev->vq[n].vring.num ||
683         num > VIRTQUEUE_MAX_SIZE ||
684         num < 0) {
685         return;
686     }
687     vdev->vq[n].vring.num = num;
688     virtqueue_init(&vdev->vq[n]);
689 }
690 
691 int virtio_queue_get_num(VirtIODevice *vdev, int n)
692 {
693     return vdev->vq[n].vring.num;
694 }
695 
696 int virtio_queue_get_id(VirtQueue *vq)
697 {
698     VirtIODevice *vdev = vq->vdev;
699     assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]);
700     return vq - &vdev->vq[0];
701 }
702 
703 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
704 {
705     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
706     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
707 
708     /* Check that the transport told us it was going to do this
709      * (so a buggy transport will immediately assert rather than
710      * silently failing to migrate this state)
711      */
712     assert(k->has_variable_vring_alignment);
713 
714     vdev->vq[n].vring.align = align;
715     virtqueue_init(&vdev->vq[n]);
716 }
717 
718 void virtio_queue_notify_vq(VirtQueue *vq)
719 {
720     if (vq->vring.desc) {
721         VirtIODevice *vdev = vq->vdev;
722         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
723         vq->handle_output(vdev, vq);
724     }
725 }
726 
727 void virtio_queue_notify(VirtIODevice *vdev, int n)
728 {
729     virtio_queue_notify_vq(&vdev->vq[n]);
730 }
731 
732 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
733 {
734     return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
735         VIRTIO_NO_VECTOR;
736 }
737 
738 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
739 {
740     if (n < VIRTIO_PCI_QUEUE_MAX)
741         vdev->vq[n].vector = vector;
742 }
743 
744 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
745                             void (*handle_output)(VirtIODevice *, VirtQueue *))
746 {
747     int i;
748 
749     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
750         if (vdev->vq[i].vring.num == 0)
751             break;
752     }
753 
754     if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
755         abort();
756 
757     vdev->vq[i].vring.num = queue_size;
758     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
759     vdev->vq[i].handle_output = handle_output;
760 
761     return &vdev->vq[i];
762 }
763 
764 void virtio_del_queue(VirtIODevice *vdev, int n)
765 {
766     if (n < 0 || n >= VIRTIO_PCI_QUEUE_MAX) {
767         abort();
768     }
769 
770     vdev->vq[n].vring.num = 0;
771 }
772 
773 void virtio_irq(VirtQueue *vq)
774 {
775     trace_virtio_irq(vq);
776     vq->vdev->isr |= 0x01;
777     virtio_notify_vector(vq->vdev, vq->vector);
778 }
779 
780 /* Assuming a given event_idx value from the other size, if
781  * we have just incremented index from old to new_idx,
782  * should we trigger an event? */
783 static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old)
784 {
785 	/* Note: Xen has similar logic for notification hold-off
786 	 * in include/xen/interface/io/ring.h with req_event and req_prod
787 	 * corresponding to event_idx + 1 and new respectively.
788 	 * Note also that req_event and req_prod in Xen start at 1,
789 	 * event indexes in virtio start at 0. */
790 	return (uint16_t)(new - event - 1) < (uint16_t)(new - old);
791 }
792 
793 static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
794 {
795     uint16_t old, new;
796     bool v;
797     /* We need to expose used array entries before checking used event. */
798     smp_mb();
799     /* Always notify when queue is empty (when feature acknowledge) */
800     if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) &&
801          !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) {
802         return true;
803     }
804 
805     if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
806         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
807     }
808 
809     v = vq->signalled_used_valid;
810     vq->signalled_used_valid = true;
811     old = vq->signalled_used;
812     new = vq->signalled_used = vring_used_idx(vq);
813     return !v || vring_need_event(vring_used_event(vq), new, old);
814 }
815 
816 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
817 {
818     if (!vring_notify(vdev, vq)) {
819         return;
820     }
821 
822     trace_virtio_notify(vdev, vq);
823     vdev->isr |= 0x01;
824     virtio_notify_vector(vdev, vq->vector);
825 }
826 
827 void virtio_notify_config(VirtIODevice *vdev)
828 {
829     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
830         return;
831 
832     vdev->isr |= 0x03;
833     virtio_notify_vector(vdev, vdev->config_vector);
834 }
835 
836 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
837 {
838     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
839     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
840     int i;
841 
842     if (k->save_config) {
843         k->save_config(qbus->parent, f);
844     }
845 
846     qemu_put_8s(f, &vdev->status);
847     qemu_put_8s(f, &vdev->isr);
848     qemu_put_be16s(f, &vdev->queue_sel);
849     qemu_put_be32s(f, &vdev->guest_features);
850     qemu_put_be32(f, vdev->config_len);
851     qemu_put_buffer(f, vdev->config, vdev->config_len);
852 
853     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
854         if (vdev->vq[i].vring.num == 0)
855             break;
856     }
857 
858     qemu_put_be32(f, i);
859 
860     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
861         if (vdev->vq[i].vring.num == 0)
862             break;
863 
864         qemu_put_be32(f, vdev->vq[i].vring.num);
865         if (k->has_variable_vring_alignment) {
866             qemu_put_be32(f, vdev->vq[i].vring.align);
867         }
868         qemu_put_be64(f, vdev->vq[i].pa);
869         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
870         if (k->save_queue) {
871             k->save_queue(qbus->parent, i, f);
872         }
873     }
874 }
875 
876 int virtio_set_features(VirtIODevice *vdev, uint32_t val)
877 {
878     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
879     VirtioBusClass *vbusk = VIRTIO_BUS_GET_CLASS(qbus);
880     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
881     uint32_t supported_features = vbusk->get_features(qbus->parent);
882     bool bad = (val & ~supported_features) != 0;
883 
884     val &= supported_features;
885     if (k->set_features) {
886         k->set_features(vdev, val);
887     }
888     vdev->guest_features = val;
889     return bad ? -1 : 0;
890 }
891 
892 int virtio_load(VirtIODevice *vdev, QEMUFile *f)
893 {
894     int num, i, ret;
895     uint32_t features;
896     uint32_t supported_features;
897     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
898     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
899 
900     if (k->load_config) {
901         ret = k->load_config(qbus->parent, f);
902         if (ret)
903             return ret;
904     }
905 
906     qemu_get_8s(f, &vdev->status);
907     qemu_get_8s(f, &vdev->isr);
908     qemu_get_be16s(f, &vdev->queue_sel);
909     qemu_get_be32s(f, &features);
910 
911     if (virtio_set_features(vdev, features) < 0) {
912         supported_features = k->get_features(qbus->parent);
913         error_report("Features 0x%x unsupported. Allowed features: 0x%x",
914                      features, supported_features);
915         return -1;
916     }
917     vdev->config_len = qemu_get_be32(f);
918     qemu_get_buffer(f, vdev->config, vdev->config_len);
919 
920     num = qemu_get_be32(f);
921 
922     for (i = 0; i < num; i++) {
923         vdev->vq[i].vring.num = qemu_get_be32(f);
924         if (k->has_variable_vring_alignment) {
925             vdev->vq[i].vring.align = qemu_get_be32(f);
926         }
927         vdev->vq[i].pa = qemu_get_be64(f);
928         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
929         vdev->vq[i].signalled_used_valid = false;
930         vdev->vq[i].notification = true;
931 
932         if (vdev->vq[i].pa) {
933             uint16_t nheads;
934             virtqueue_init(&vdev->vq[i]);
935             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
936             /* Check it isn't doing very strange things with descriptor numbers. */
937             if (nheads > vdev->vq[i].vring.num) {
938                 error_report("VQ %d size 0x%x Guest index 0x%x "
939                              "inconsistent with Host index 0x%x: delta 0x%x",
940                              i, vdev->vq[i].vring.num,
941                              vring_avail_idx(&vdev->vq[i]),
942                              vdev->vq[i].last_avail_idx, nheads);
943                 return -1;
944             }
945         } else if (vdev->vq[i].last_avail_idx) {
946             error_report("VQ %d address 0x0 "
947                          "inconsistent with Host index 0x%x",
948                          i, vdev->vq[i].last_avail_idx);
949                 return -1;
950 	}
951         if (k->load_queue) {
952             ret = k->load_queue(qbus->parent, i, f);
953             if (ret)
954                 return ret;
955         }
956     }
957 
958     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
959     return 0;
960 }
961 
962 void virtio_cleanup(VirtIODevice *vdev)
963 {
964     qemu_del_vm_change_state_handler(vdev->vmstate);
965     g_free(vdev->config);
966     g_free(vdev->vq);
967 }
968 
969 static void virtio_vmstate_change(void *opaque, int running, RunState state)
970 {
971     VirtIODevice *vdev = opaque;
972     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
973     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
974     bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
975     vdev->vm_running = running;
976 
977     if (backend_run) {
978         virtio_set_status(vdev, vdev->status);
979     }
980 
981     if (k->vmstate_change) {
982         k->vmstate_change(qbus->parent, backend_run);
983     }
984 
985     if (!backend_run) {
986         virtio_set_status(vdev, vdev->status);
987     }
988 }
989 
990 void virtio_init(VirtIODevice *vdev, const char *name,
991                  uint16_t device_id, size_t config_size)
992 {
993     int i;
994     vdev->device_id = device_id;
995     vdev->status = 0;
996     vdev->isr = 0;
997     vdev->queue_sel = 0;
998     vdev->config_vector = VIRTIO_NO_VECTOR;
999     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
1000     vdev->vm_running = runstate_is_running();
1001     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
1002         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1003         vdev->vq[i].vdev = vdev;
1004         vdev->vq[i].queue_index = i;
1005     }
1006 
1007     vdev->name = name;
1008     vdev->config_len = config_size;
1009     if (vdev->config_len) {
1010         vdev->config = g_malloc0(config_size);
1011     } else {
1012         vdev->config = NULL;
1013     }
1014     vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
1015                                                      vdev);
1016 }
1017 
1018 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1019 {
1020     return vdev->vq[n].vring.desc;
1021 }
1022 
1023 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1024 {
1025     return vdev->vq[n].vring.avail;
1026 }
1027 
1028 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1029 {
1030     return vdev->vq[n].vring.used;
1031 }
1032 
1033 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
1034 {
1035     return vdev->vq[n].vring.desc;
1036 }
1037 
1038 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1039 {
1040     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1041 }
1042 
1043 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1044 {
1045     return offsetof(VRingAvail, ring) +
1046         sizeof(uint64_t) * vdev->vq[n].vring.num;
1047 }
1048 
1049 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1050 {
1051     return offsetof(VRingUsed, ring) +
1052         sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1053 }
1054 
1055 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
1056 {
1057     return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
1058 	    virtio_queue_get_used_size(vdev, n);
1059 }
1060 
1061 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1062 {
1063     return vdev->vq[n].last_avail_idx;
1064 }
1065 
1066 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1067 {
1068     vdev->vq[n].last_avail_idx = idx;
1069 }
1070 
1071 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
1072 {
1073     vdev->vq[n].signalled_used_valid = false;
1074 }
1075 
1076 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
1077 {
1078     return vdev->vq + n;
1079 }
1080 
1081 uint16_t virtio_get_queue_index(VirtQueue *vq)
1082 {
1083     return vq->queue_index;
1084 }
1085 
1086 static void virtio_queue_guest_notifier_read(EventNotifier *n)
1087 {
1088     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
1089     if (event_notifier_test_and_clear(n)) {
1090         virtio_irq(vq);
1091     }
1092 }
1093 
1094 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
1095                                                 bool with_irqfd)
1096 {
1097     if (assign && !with_irqfd) {
1098         event_notifier_set_handler(&vq->guest_notifier,
1099                                    virtio_queue_guest_notifier_read);
1100     } else {
1101         event_notifier_set_handler(&vq->guest_notifier, NULL);
1102     }
1103     if (!assign) {
1104         /* Test and clear notifier before closing it,
1105          * in case poll callback didn't have time to run. */
1106         virtio_queue_guest_notifier_read(&vq->guest_notifier);
1107     }
1108 }
1109 
1110 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
1111 {
1112     return &vq->guest_notifier;
1113 }
1114 
1115 static void virtio_queue_host_notifier_read(EventNotifier *n)
1116 {
1117     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1118     if (event_notifier_test_and_clear(n)) {
1119         virtio_queue_notify_vq(vq);
1120     }
1121 }
1122 
1123 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
1124                                                bool set_handler)
1125 {
1126     if (assign && set_handler) {
1127         event_notifier_set_handler(&vq->host_notifier,
1128                                    virtio_queue_host_notifier_read);
1129     } else {
1130         event_notifier_set_handler(&vq->host_notifier, NULL);
1131     }
1132     if (!assign) {
1133         /* Test and clear notifier before after disabling event,
1134          * in case poll callback didn't have time to run. */
1135         virtio_queue_host_notifier_read(&vq->host_notifier);
1136     }
1137 }
1138 
1139 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
1140 {
1141     return &vq->host_notifier;
1142 }
1143 
1144 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
1145 {
1146     if (vdev->bus_name) {
1147         g_free(vdev->bus_name);
1148         vdev->bus_name = NULL;
1149     }
1150 
1151     if (bus_name) {
1152         vdev->bus_name = g_strdup(bus_name);
1153     }
1154 }
1155 
1156 static void virtio_device_realize(DeviceState *dev, Error **errp)
1157 {
1158     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1159     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1160     Error *err = NULL;
1161 
1162     if (vdc->realize != NULL) {
1163         vdc->realize(dev, &err);
1164         if (err != NULL) {
1165             error_propagate(errp, err);
1166             return;
1167         }
1168     }
1169     virtio_bus_device_plugged(vdev);
1170 }
1171 
1172 static void virtio_device_unrealize(DeviceState *dev, Error **errp)
1173 {
1174     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1175     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1176     Error *err = NULL;
1177 
1178     virtio_bus_device_unplugged(vdev);
1179 
1180     if (vdc->unrealize != NULL) {
1181         vdc->unrealize(dev, &err);
1182         if (err != NULL) {
1183             error_propagate(errp, err);
1184             return;
1185         }
1186     }
1187 
1188     if (vdev->bus_name) {
1189         g_free(vdev->bus_name);
1190         vdev->bus_name = NULL;
1191     }
1192 }
1193 
1194 static void virtio_device_class_init(ObjectClass *klass, void *data)
1195 {
1196     /* Set the default value here. */
1197     DeviceClass *dc = DEVICE_CLASS(klass);
1198 
1199     dc->realize = virtio_device_realize;
1200     dc->unrealize = virtio_device_unrealize;
1201     dc->bus_type = TYPE_VIRTIO_BUS;
1202 }
1203 
1204 static const TypeInfo virtio_device_info = {
1205     .name = TYPE_VIRTIO_DEVICE,
1206     .parent = TYPE_DEVICE,
1207     .instance_size = sizeof(VirtIODevice),
1208     .class_init = virtio_device_class_init,
1209     .abstract = true,
1210     .class_size = sizeof(VirtioDeviceClass),
1211 };
1212 
1213 static void virtio_register_types(void)
1214 {
1215     type_register_static(&virtio_device_info);
1216 }
1217 
1218 type_init(virtio_register_types)
1219