xref: /openbmc/qemu/hw/virtio/virtio.c (revision 5692399f)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include <inttypes.h>
15 
16 #include "trace.h"
17 #include "exec/address-spaces.h"
18 #include "qemu/error-report.h"
19 #include "hw/virtio/virtio.h"
20 #include "qemu/atomic.h"
21 #include "hw/virtio/virtio-bus.h"
22 #include "migration/migration.h"
23 #include "hw/virtio/virtio-access.h"
24 
25 /*
26  * The alignment to use between consumer and producer parts of vring.
27  * x86 pagesize again. This is the default, used by transports like PCI
28  * which don't provide a means for the guest to tell the host the alignment.
29  */
30 #define VIRTIO_PCI_VRING_ALIGN         4096
31 
32 typedef struct VRingDesc
33 {
34     uint64_t addr;
35     uint32_t len;
36     uint16_t flags;
37     uint16_t next;
38 } VRingDesc;
39 
40 typedef struct VRingAvail
41 {
42     uint16_t flags;
43     uint16_t idx;
44     uint16_t ring[0];
45 } VRingAvail;
46 
47 typedef struct VRingUsedElem
48 {
49     uint32_t id;
50     uint32_t len;
51 } VRingUsedElem;
52 
53 typedef struct VRingUsed
54 {
55     uint16_t flags;
56     uint16_t idx;
57     VRingUsedElem ring[0];
58 } VRingUsed;
59 
60 typedef struct VRing
61 {
62     unsigned int num;
63     unsigned int align;
64     hwaddr desc;
65     hwaddr avail;
66     hwaddr used;
67 } VRing;
68 
69 struct VirtQueue
70 {
71     VRing vring;
72     hwaddr pa;
73     uint16_t last_avail_idx;
74     /* Last used index value we have signalled on */
75     uint16_t signalled_used;
76 
77     /* Last used index value we have signalled on */
78     bool signalled_used_valid;
79 
80     /* Notification enabled? */
81     bool notification;
82 
83     uint16_t queue_index;
84 
85     int inuse;
86 
87     uint16_t vector;
88     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
89     VirtIODevice *vdev;
90     EventNotifier guest_notifier;
91     EventNotifier host_notifier;
92 };
93 
94 /* virt queue functions */
95 static void virtqueue_init(VirtQueue *vq)
96 {
97     hwaddr pa = vq->pa;
98 
99     vq->vring.desc = pa;
100     vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
101     vq->vring.used = vring_align(vq->vring.avail +
102                                  offsetof(VRingAvail, ring[vq->vring.num]),
103                                  vq->vring.align);
104 }
105 
106 static inline uint64_t vring_desc_addr(VirtIODevice *vdev, hwaddr desc_pa,
107                                        int i)
108 {
109     hwaddr pa;
110     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
111     return virtio_ldq_phys(vdev, pa);
112 }
113 
114 static inline uint32_t vring_desc_len(VirtIODevice *vdev, hwaddr desc_pa, int i)
115 {
116     hwaddr pa;
117     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
118     return virtio_ldl_phys(vdev, pa);
119 }
120 
121 static inline uint16_t vring_desc_flags(VirtIODevice *vdev, hwaddr desc_pa,
122                                         int i)
123 {
124     hwaddr pa;
125     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
126     return virtio_lduw_phys(vdev, pa);
127 }
128 
129 static inline uint16_t vring_desc_next(VirtIODevice *vdev, hwaddr desc_pa,
130                                        int i)
131 {
132     hwaddr pa;
133     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
134     return virtio_lduw_phys(vdev, pa);
135 }
136 
137 static inline uint16_t vring_avail_flags(VirtQueue *vq)
138 {
139     hwaddr pa;
140     pa = vq->vring.avail + offsetof(VRingAvail, flags);
141     return virtio_lduw_phys(vq->vdev, pa);
142 }
143 
144 static inline uint16_t vring_avail_idx(VirtQueue *vq)
145 {
146     hwaddr pa;
147     pa = vq->vring.avail + offsetof(VRingAvail, idx);
148     return virtio_lduw_phys(vq->vdev, pa);
149 }
150 
151 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
152 {
153     hwaddr pa;
154     pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
155     return virtio_lduw_phys(vq->vdev, pa);
156 }
157 
158 static inline uint16_t vring_used_event(VirtQueue *vq)
159 {
160     return vring_avail_ring(vq, vq->vring.num);
161 }
162 
163 static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
164 {
165     hwaddr pa;
166     pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
167     virtio_stl_phys(vq->vdev, pa, val);
168 }
169 
170 static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
171 {
172     hwaddr pa;
173     pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
174     virtio_stl_phys(vq->vdev, pa, val);
175 }
176 
177 static uint16_t vring_used_idx(VirtQueue *vq)
178 {
179     hwaddr pa;
180     pa = vq->vring.used + offsetof(VRingUsed, idx);
181     return virtio_lduw_phys(vq->vdev, pa);
182 }
183 
184 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
185 {
186     hwaddr pa;
187     pa = vq->vring.used + offsetof(VRingUsed, idx);
188     virtio_stw_phys(vq->vdev, pa, val);
189 }
190 
191 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
192 {
193     VirtIODevice *vdev = vq->vdev;
194     hwaddr pa;
195     pa = vq->vring.used + offsetof(VRingUsed, flags);
196     virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
197 }
198 
199 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
200 {
201     VirtIODevice *vdev = vq->vdev;
202     hwaddr pa;
203     pa = vq->vring.used + offsetof(VRingUsed, flags);
204     virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
205 }
206 
207 static inline void vring_avail_event(VirtQueue *vq, uint16_t val)
208 {
209     hwaddr pa;
210     if (!vq->notification) {
211         return;
212     }
213     pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
214     virtio_stw_phys(vq->vdev, pa, val);
215 }
216 
217 void virtio_queue_set_notification(VirtQueue *vq, int enable)
218 {
219     vq->notification = enable;
220     if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
221         vring_avail_event(vq, vring_avail_idx(vq));
222     } else if (enable) {
223         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
224     } else {
225         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
226     }
227     if (enable) {
228         /* Expose avail event/used flags before caller checks the avail idx. */
229         smp_mb();
230     }
231 }
232 
233 int virtio_queue_ready(VirtQueue *vq)
234 {
235     return vq->vring.avail != 0;
236 }
237 
238 int virtio_queue_empty(VirtQueue *vq)
239 {
240     return vring_avail_idx(vq) == vq->last_avail_idx;
241 }
242 
243 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
244                     unsigned int len, unsigned int idx)
245 {
246     unsigned int offset;
247     int i;
248 
249     trace_virtqueue_fill(vq, elem, len, idx);
250 
251     offset = 0;
252     for (i = 0; i < elem->in_num; i++) {
253         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
254 
255         cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
256                                   elem->in_sg[i].iov_len,
257                                   1, size);
258 
259         offset += size;
260     }
261 
262     for (i = 0; i < elem->out_num; i++)
263         cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
264                                   elem->out_sg[i].iov_len,
265                                   0, elem->out_sg[i].iov_len);
266 
267     idx = (idx + vring_used_idx(vq)) % vq->vring.num;
268 
269     /* Get a pointer to the next entry in the used ring. */
270     vring_used_ring_id(vq, idx, elem->index);
271     vring_used_ring_len(vq, idx, len);
272 }
273 
274 void virtqueue_flush(VirtQueue *vq, unsigned int count)
275 {
276     uint16_t old, new;
277     /* Make sure buffer is written before we update index. */
278     smp_wmb();
279     trace_virtqueue_flush(vq, count);
280     old = vring_used_idx(vq);
281     new = old + count;
282     vring_used_idx_set(vq, new);
283     vq->inuse -= count;
284     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
285         vq->signalled_used_valid = false;
286 }
287 
288 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
289                     unsigned int len)
290 {
291     virtqueue_fill(vq, elem, len, 0);
292     virtqueue_flush(vq, 1);
293 }
294 
295 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
296 {
297     uint16_t num_heads = vring_avail_idx(vq) - idx;
298 
299     /* Check it isn't doing very strange things with descriptor numbers. */
300     if (num_heads > vq->vring.num) {
301         error_report("Guest moved used index from %u to %u",
302                      idx, vring_avail_idx(vq));
303         exit(1);
304     }
305     /* On success, callers read a descriptor at vq->last_avail_idx.
306      * Make sure descriptor read does not bypass avail index read. */
307     if (num_heads) {
308         smp_rmb();
309     }
310 
311     return num_heads;
312 }
313 
314 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
315 {
316     unsigned int head;
317 
318     /* Grab the next descriptor number they're advertising, and increment
319      * the index we've seen. */
320     head = vring_avail_ring(vq, idx % vq->vring.num);
321 
322     /* If their number is silly, that's a fatal mistake. */
323     if (head >= vq->vring.num) {
324         error_report("Guest says index %u is available", head);
325         exit(1);
326     }
327 
328     return head;
329 }
330 
331 static unsigned virtqueue_next_desc(VirtIODevice *vdev, hwaddr desc_pa,
332                                     unsigned int i, unsigned int max)
333 {
334     unsigned int next;
335 
336     /* If this descriptor says it doesn't chain, we're done. */
337     if (!(vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_NEXT)) {
338         return max;
339     }
340 
341     /* Check they're not leading us off end of descriptors. */
342     next = vring_desc_next(vdev, desc_pa, i);
343     /* Make sure compiler knows to grab that: we don't want it changing! */
344     smp_wmb();
345 
346     if (next >= max) {
347         error_report("Desc next is %u", next);
348         exit(1);
349     }
350 
351     return next;
352 }
353 
354 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
355                                unsigned int *out_bytes,
356                                unsigned max_in_bytes, unsigned max_out_bytes)
357 {
358     unsigned int idx;
359     unsigned int total_bufs, in_total, out_total;
360 
361     idx = vq->last_avail_idx;
362 
363     total_bufs = in_total = out_total = 0;
364     while (virtqueue_num_heads(vq, idx)) {
365         VirtIODevice *vdev = vq->vdev;
366         unsigned int max, num_bufs, indirect = 0;
367         hwaddr desc_pa;
368         int i;
369 
370         max = vq->vring.num;
371         num_bufs = total_bufs;
372         i = virtqueue_get_head(vq, idx++);
373         desc_pa = vq->vring.desc;
374 
375         if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_INDIRECT) {
376             if (vring_desc_len(vdev, desc_pa, i) % sizeof(VRingDesc)) {
377                 error_report("Invalid size for indirect buffer table");
378                 exit(1);
379             }
380 
381             /* If we've got too many, that implies a descriptor loop. */
382             if (num_bufs >= max) {
383                 error_report("Looped descriptor");
384                 exit(1);
385             }
386 
387             /* loop over the indirect descriptor table */
388             indirect = 1;
389             max = vring_desc_len(vdev, desc_pa, i) / sizeof(VRingDesc);
390             desc_pa = vring_desc_addr(vdev, desc_pa, i);
391             num_bufs = i = 0;
392         }
393 
394         do {
395             /* If we've got too many, that implies a descriptor loop. */
396             if (++num_bufs > max) {
397                 error_report("Looped descriptor");
398                 exit(1);
399             }
400 
401             if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_WRITE) {
402                 in_total += vring_desc_len(vdev, desc_pa, i);
403             } else {
404                 out_total += vring_desc_len(vdev, desc_pa, i);
405             }
406             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
407                 goto done;
408             }
409         } while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max);
410 
411         if (!indirect)
412             total_bufs = num_bufs;
413         else
414             total_bufs++;
415     }
416 done:
417     if (in_bytes) {
418         *in_bytes = in_total;
419     }
420     if (out_bytes) {
421         *out_bytes = out_total;
422     }
423 }
424 
425 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
426                           unsigned int out_bytes)
427 {
428     unsigned int in_total, out_total;
429 
430     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
431     return in_bytes <= in_total && out_bytes <= out_total;
432 }
433 
434 void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
435     size_t num_sg, int is_write)
436 {
437     unsigned int i;
438     hwaddr len;
439 
440     if (num_sg > VIRTQUEUE_MAX_SIZE) {
441         error_report("virtio: map attempt out of bounds: %zd > %d",
442                      num_sg, VIRTQUEUE_MAX_SIZE);
443         exit(1);
444     }
445 
446     for (i = 0; i < num_sg; i++) {
447         len = sg[i].iov_len;
448         sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
449         if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
450             error_report("virtio: error trying to map MMIO memory");
451             exit(1);
452         }
453     }
454 }
455 
456 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
457 {
458     unsigned int i, head, max;
459     hwaddr desc_pa = vq->vring.desc;
460     VirtIODevice *vdev = vq->vdev;
461 
462     if (!virtqueue_num_heads(vq, vq->last_avail_idx))
463         return 0;
464 
465     /* When we start there are none of either input nor output. */
466     elem->out_num = elem->in_num = 0;
467 
468     max = vq->vring.num;
469 
470     i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
471     if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
472         vring_avail_event(vq, vring_avail_idx(vq));
473     }
474 
475     if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_INDIRECT) {
476         if (vring_desc_len(vdev, desc_pa, i) % sizeof(VRingDesc)) {
477             error_report("Invalid size for indirect buffer table");
478             exit(1);
479         }
480 
481         /* loop over the indirect descriptor table */
482         max = vring_desc_len(vdev, desc_pa, i) / sizeof(VRingDesc);
483         desc_pa = vring_desc_addr(vdev, desc_pa, i);
484         i = 0;
485     }
486 
487     /* Collect all the descriptors */
488     do {
489         struct iovec *sg;
490 
491         if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_WRITE) {
492             if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
493                 error_report("Too many write descriptors in indirect table");
494                 exit(1);
495             }
496             elem->in_addr[elem->in_num] = vring_desc_addr(vdev, desc_pa, i);
497             sg = &elem->in_sg[elem->in_num++];
498         } else {
499             if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
500                 error_report("Too many read descriptors in indirect table");
501                 exit(1);
502             }
503             elem->out_addr[elem->out_num] = vring_desc_addr(vdev, desc_pa, i);
504             sg = &elem->out_sg[elem->out_num++];
505         }
506 
507         sg->iov_len = vring_desc_len(vdev, desc_pa, i);
508 
509         /* If we've got too many, that implies a descriptor loop. */
510         if ((elem->in_num + elem->out_num) > max) {
511             error_report("Looped descriptor");
512             exit(1);
513         }
514     } while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max);
515 
516     /* Now map what we have collected */
517     virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
518     virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
519 
520     elem->index = head;
521 
522     vq->inuse++;
523 
524     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
525     return elem->in_num + elem->out_num;
526 }
527 
528 /* virtio device */
529 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
530 {
531     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
532     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
533 
534     if (k->notify) {
535         k->notify(qbus->parent, vector);
536     }
537 }
538 
539 void virtio_update_irq(VirtIODevice *vdev)
540 {
541     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
542 }
543 
544 void virtio_set_status(VirtIODevice *vdev, uint8_t val)
545 {
546     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
547     trace_virtio_set_status(vdev, val);
548 
549     if (k->set_status) {
550         k->set_status(vdev, val);
551     }
552     vdev->status = val;
553 }
554 
555 bool target_words_bigendian(void);
556 static enum virtio_device_endian virtio_default_endian(void)
557 {
558     if (target_words_bigendian()) {
559         return VIRTIO_DEVICE_ENDIAN_BIG;
560     } else {
561         return VIRTIO_DEVICE_ENDIAN_LITTLE;
562     }
563 }
564 
565 static enum virtio_device_endian virtio_current_cpu_endian(void)
566 {
567     CPUClass *cc = CPU_GET_CLASS(current_cpu);
568 
569     if (cc->virtio_is_big_endian(current_cpu)) {
570         return VIRTIO_DEVICE_ENDIAN_BIG;
571     } else {
572         return VIRTIO_DEVICE_ENDIAN_LITTLE;
573     }
574 }
575 
576 void virtio_reset(void *opaque)
577 {
578     VirtIODevice *vdev = opaque;
579     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
580     int i;
581 
582     virtio_set_status(vdev, 0);
583     if (current_cpu) {
584         /* Guest initiated reset */
585         vdev->device_endian = virtio_current_cpu_endian();
586     } else {
587         /* System reset */
588         vdev->device_endian = virtio_default_endian();
589     }
590 
591     if (k->reset) {
592         k->reset(vdev);
593     }
594 
595     vdev->guest_features = 0;
596     vdev->queue_sel = 0;
597     vdev->status = 0;
598     vdev->isr = 0;
599     vdev->config_vector = VIRTIO_NO_VECTOR;
600     virtio_notify_vector(vdev, vdev->config_vector);
601 
602     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
603         vdev->vq[i].vring.desc = 0;
604         vdev->vq[i].vring.avail = 0;
605         vdev->vq[i].vring.used = 0;
606         vdev->vq[i].last_avail_idx = 0;
607         vdev->vq[i].pa = 0;
608         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
609         vdev->vq[i].signalled_used = 0;
610         vdev->vq[i].signalled_used_valid = false;
611         vdev->vq[i].notification = true;
612     }
613 }
614 
615 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
616 {
617     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
618     uint8_t val;
619 
620     if (addr + sizeof(val) > vdev->config_len) {
621         return (uint32_t)-1;
622     }
623 
624     k->get_config(vdev, vdev->config);
625 
626     val = ldub_p(vdev->config + addr);
627     return val;
628 }
629 
630 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
631 {
632     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
633     uint16_t val;
634 
635     if (addr + sizeof(val) > vdev->config_len) {
636         return (uint32_t)-1;
637     }
638 
639     k->get_config(vdev, vdev->config);
640 
641     val = lduw_p(vdev->config + addr);
642     return val;
643 }
644 
645 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
646 {
647     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
648     uint32_t val;
649 
650     if (addr + sizeof(val) > vdev->config_len) {
651         return (uint32_t)-1;
652     }
653 
654     k->get_config(vdev, vdev->config);
655 
656     val = ldl_p(vdev->config + addr);
657     return val;
658 }
659 
660 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
661 {
662     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
663     uint8_t val = data;
664 
665     if (addr + sizeof(val) > vdev->config_len) {
666         return;
667     }
668 
669     stb_p(vdev->config + addr, val);
670 
671     if (k->set_config) {
672         k->set_config(vdev, vdev->config);
673     }
674 }
675 
676 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
677 {
678     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
679     uint16_t val = data;
680 
681     if (addr + sizeof(val) > vdev->config_len) {
682         return;
683     }
684 
685     stw_p(vdev->config + addr, val);
686 
687     if (k->set_config) {
688         k->set_config(vdev, vdev->config);
689     }
690 }
691 
692 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
693 {
694     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
695     uint32_t val = data;
696 
697     if (addr + sizeof(val) > vdev->config_len) {
698         return;
699     }
700 
701     stl_p(vdev->config + addr, val);
702 
703     if (k->set_config) {
704         k->set_config(vdev, vdev->config);
705     }
706 }
707 
708 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
709 {
710     vdev->vq[n].pa = addr;
711     virtqueue_init(&vdev->vq[n]);
712 }
713 
714 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
715 {
716     return vdev->vq[n].pa;
717 }
718 
719 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
720 {
721     /* Don't allow guest to flip queue between existent and
722      * nonexistent states, or to set it to an invalid size.
723      */
724     if (!!num != !!vdev->vq[n].vring.num ||
725         num > VIRTQUEUE_MAX_SIZE ||
726         num < 0) {
727         return;
728     }
729     vdev->vq[n].vring.num = num;
730     virtqueue_init(&vdev->vq[n]);
731 }
732 
733 int virtio_queue_get_num(VirtIODevice *vdev, int n)
734 {
735     return vdev->vq[n].vring.num;
736 }
737 
738 int virtio_queue_get_id(VirtQueue *vq)
739 {
740     VirtIODevice *vdev = vq->vdev;
741     assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]);
742     return vq - &vdev->vq[0];
743 }
744 
745 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
746 {
747     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
748     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
749 
750     /* Check that the transport told us it was going to do this
751      * (so a buggy transport will immediately assert rather than
752      * silently failing to migrate this state)
753      */
754     assert(k->has_variable_vring_alignment);
755 
756     vdev->vq[n].vring.align = align;
757     virtqueue_init(&vdev->vq[n]);
758 }
759 
760 void virtio_queue_notify_vq(VirtQueue *vq)
761 {
762     if (vq->vring.desc) {
763         VirtIODevice *vdev = vq->vdev;
764         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
765         vq->handle_output(vdev, vq);
766     }
767 }
768 
769 void virtio_queue_notify(VirtIODevice *vdev, int n)
770 {
771     virtio_queue_notify_vq(&vdev->vq[n]);
772 }
773 
774 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
775 {
776     return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
777         VIRTIO_NO_VECTOR;
778 }
779 
780 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
781 {
782     if (n < VIRTIO_PCI_QUEUE_MAX)
783         vdev->vq[n].vector = vector;
784 }
785 
786 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
787                             void (*handle_output)(VirtIODevice *, VirtQueue *))
788 {
789     int i;
790 
791     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
792         if (vdev->vq[i].vring.num == 0)
793             break;
794     }
795 
796     if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
797         abort();
798 
799     vdev->vq[i].vring.num = queue_size;
800     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
801     vdev->vq[i].handle_output = handle_output;
802 
803     return &vdev->vq[i];
804 }
805 
806 void virtio_del_queue(VirtIODevice *vdev, int n)
807 {
808     if (n < 0 || n >= VIRTIO_PCI_QUEUE_MAX) {
809         abort();
810     }
811 
812     vdev->vq[n].vring.num = 0;
813 }
814 
815 void virtio_irq(VirtQueue *vq)
816 {
817     trace_virtio_irq(vq);
818     vq->vdev->isr |= 0x01;
819     virtio_notify_vector(vq->vdev, vq->vector);
820 }
821 
822 /* Assuming a given event_idx value from the other size, if
823  * we have just incremented index from old to new_idx,
824  * should we trigger an event? */
825 static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old)
826 {
827 	/* Note: Xen has similar logic for notification hold-off
828 	 * in include/xen/interface/io/ring.h with req_event and req_prod
829 	 * corresponding to event_idx + 1 and new respectively.
830 	 * Note also that req_event and req_prod in Xen start at 1,
831 	 * event indexes in virtio start at 0. */
832 	return (uint16_t)(new - event - 1) < (uint16_t)(new - old);
833 }
834 
835 static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
836 {
837     uint16_t old, new;
838     bool v;
839     /* We need to expose used array entries before checking used event. */
840     smp_mb();
841     /* Always notify when queue is empty (when feature acknowledge) */
842     if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) &&
843          !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) {
844         return true;
845     }
846 
847     if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
848         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
849     }
850 
851     v = vq->signalled_used_valid;
852     vq->signalled_used_valid = true;
853     old = vq->signalled_used;
854     new = vq->signalled_used = vring_used_idx(vq);
855     return !v || vring_need_event(vring_used_event(vq), new, old);
856 }
857 
858 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
859 {
860     if (!vring_notify(vdev, vq)) {
861         return;
862     }
863 
864     trace_virtio_notify(vdev, vq);
865     vdev->isr |= 0x01;
866     virtio_notify_vector(vdev, vq->vector);
867 }
868 
869 void virtio_notify_config(VirtIODevice *vdev)
870 {
871     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
872         return;
873 
874     vdev->isr |= 0x03;
875     virtio_notify_vector(vdev, vdev->config_vector);
876 }
877 
878 static bool virtio_device_endian_needed(void *opaque)
879 {
880     VirtIODevice *vdev = opaque;
881 
882     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
883     return vdev->device_endian != virtio_default_endian();
884 }
885 
886 static const VMStateDescription vmstate_virtio_device_endian = {
887     .name = "virtio/device_endian",
888     .version_id = 1,
889     .minimum_version_id = 1,
890     .fields = (VMStateField[]) {
891         VMSTATE_UINT8(device_endian, VirtIODevice),
892         VMSTATE_END_OF_LIST()
893     }
894 };
895 
896 static const VMStateDescription vmstate_virtio = {
897     .name = "virtio",
898     .version_id = 1,
899     .minimum_version_id = 1,
900     .minimum_version_id_old = 1,
901     .fields = (VMStateField[]) {
902         VMSTATE_END_OF_LIST()
903     },
904     .subsections = (VMStateSubsection[]) {
905         {
906             .vmsd = &vmstate_virtio_device_endian,
907             .needed = &virtio_device_endian_needed
908         },
909         { 0 }
910     }
911 };
912 
913 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
914 {
915     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
916     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
917     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
918     int i;
919 
920     if (k->save_config) {
921         k->save_config(qbus->parent, f);
922     }
923 
924     qemu_put_8s(f, &vdev->status);
925     qemu_put_8s(f, &vdev->isr);
926     qemu_put_be16s(f, &vdev->queue_sel);
927     qemu_put_be32s(f, &vdev->guest_features);
928     qemu_put_be32(f, vdev->config_len);
929     qemu_put_buffer(f, vdev->config, vdev->config_len);
930 
931     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
932         if (vdev->vq[i].vring.num == 0)
933             break;
934     }
935 
936     qemu_put_be32(f, i);
937 
938     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
939         if (vdev->vq[i].vring.num == 0)
940             break;
941 
942         qemu_put_be32(f, vdev->vq[i].vring.num);
943         if (k->has_variable_vring_alignment) {
944             qemu_put_be32(f, vdev->vq[i].vring.align);
945         }
946         qemu_put_be64(f, vdev->vq[i].pa);
947         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
948         if (k->save_queue) {
949             k->save_queue(qbus->parent, i, f);
950         }
951     }
952 
953     if (vdc->save != NULL) {
954         vdc->save(vdev, f);
955     }
956 
957     /* Subsections */
958     vmstate_save_state(f, &vmstate_virtio, vdev);
959 }
960 
961 int virtio_set_features(VirtIODevice *vdev, uint32_t val)
962 {
963     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
964     VirtioBusClass *vbusk = VIRTIO_BUS_GET_CLASS(qbus);
965     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
966     uint32_t supported_features = vbusk->get_features(qbus->parent);
967     bool bad = (val & ~supported_features) != 0;
968 
969     val &= supported_features;
970     if (k->set_features) {
971         k->set_features(vdev, val);
972     }
973     vdev->guest_features = val;
974     return bad ? -1 : 0;
975 }
976 
977 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
978 {
979     int i, ret;
980     int32_t config_len;
981     uint32_t num;
982     uint32_t features;
983     uint32_t supported_features;
984     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
985     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
986     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
987 
988     /*
989      * We poison the endianness to ensure it does not get used before
990      * subsections have been loaded.
991      */
992     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
993 
994     if (k->load_config) {
995         ret = k->load_config(qbus->parent, f);
996         if (ret)
997             return ret;
998     }
999 
1000     qemu_get_8s(f, &vdev->status);
1001     qemu_get_8s(f, &vdev->isr);
1002     qemu_get_be16s(f, &vdev->queue_sel);
1003     if (vdev->queue_sel >= VIRTIO_PCI_QUEUE_MAX) {
1004         return -1;
1005     }
1006     qemu_get_be32s(f, &features);
1007 
1008     if (virtio_set_features(vdev, features) < 0) {
1009         supported_features = k->get_features(qbus->parent);
1010         error_report("Features 0x%x unsupported. Allowed features: 0x%x",
1011                      features, supported_features);
1012         return -1;
1013     }
1014     config_len = qemu_get_be32(f);
1015 
1016     /*
1017      * There are cases where the incoming config can be bigger or smaller
1018      * than what we have; so load what we have space for, and skip
1019      * any excess that's in the stream.
1020      */
1021     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
1022 
1023     while (config_len > vdev->config_len) {
1024         qemu_get_byte(f);
1025         config_len--;
1026     }
1027 
1028     num = qemu_get_be32(f);
1029 
1030     if (num > VIRTIO_PCI_QUEUE_MAX) {
1031         error_report("Invalid number of PCI queues: 0x%x", num);
1032         return -1;
1033     }
1034 
1035     for (i = 0; i < num; i++) {
1036         vdev->vq[i].vring.num = qemu_get_be32(f);
1037         if (k->has_variable_vring_alignment) {
1038             vdev->vq[i].vring.align = qemu_get_be32(f);
1039         }
1040         vdev->vq[i].pa = qemu_get_be64(f);
1041         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
1042         vdev->vq[i].signalled_used_valid = false;
1043         vdev->vq[i].notification = true;
1044 
1045         if (vdev->vq[i].pa) {
1046             virtqueue_init(&vdev->vq[i]);
1047         } else if (vdev->vq[i].last_avail_idx) {
1048             error_report("VQ %d address 0x0 "
1049                          "inconsistent with Host index 0x%x",
1050                          i, vdev->vq[i].last_avail_idx);
1051                 return -1;
1052 	}
1053         if (k->load_queue) {
1054             ret = k->load_queue(qbus->parent, i, f);
1055             if (ret)
1056                 return ret;
1057         }
1058     }
1059 
1060     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1061 
1062     if (vdc->load != NULL) {
1063         ret = vdc->load(vdev, f, version_id);
1064         if (ret) {
1065             return ret;
1066         }
1067     }
1068 
1069     /* Subsections */
1070     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
1071     if (ret) {
1072         return ret;
1073     }
1074 
1075     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
1076         vdev->device_endian = virtio_default_endian();
1077     }
1078 
1079     for (i = 0; i < num; i++) {
1080         if (vdev->vq[i].pa) {
1081             uint16_t nheads;
1082             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
1083             /* Check it isn't doing strange things with descriptor numbers. */
1084             if (nheads > vdev->vq[i].vring.num) {
1085                 error_report("VQ %d size 0x%x Guest index 0x%x "
1086                              "inconsistent with Host index 0x%x: delta 0x%x",
1087                              i, vdev->vq[i].vring.num,
1088                              vring_avail_idx(&vdev->vq[i]),
1089                              vdev->vq[i].last_avail_idx, nheads);
1090                 return -1;
1091             }
1092         }
1093     }
1094 
1095     return 0;
1096 }
1097 
1098 void virtio_cleanup(VirtIODevice *vdev)
1099 {
1100     qemu_del_vm_change_state_handler(vdev->vmstate);
1101     g_free(vdev->config);
1102     g_free(vdev->vq);
1103 }
1104 
1105 static void virtio_vmstate_change(void *opaque, int running, RunState state)
1106 {
1107     VirtIODevice *vdev = opaque;
1108     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1109     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1110     bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
1111     vdev->vm_running = running;
1112 
1113     if (backend_run) {
1114         virtio_set_status(vdev, vdev->status);
1115     }
1116 
1117     if (k->vmstate_change) {
1118         k->vmstate_change(qbus->parent, backend_run);
1119     }
1120 
1121     if (!backend_run) {
1122         virtio_set_status(vdev, vdev->status);
1123     }
1124 }
1125 
1126 void virtio_init(VirtIODevice *vdev, const char *name,
1127                  uint16_t device_id, size_t config_size)
1128 {
1129     int i;
1130     vdev->device_id = device_id;
1131     vdev->status = 0;
1132     vdev->isr = 0;
1133     vdev->queue_sel = 0;
1134     vdev->config_vector = VIRTIO_NO_VECTOR;
1135     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
1136     vdev->vm_running = runstate_is_running();
1137     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
1138         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1139         vdev->vq[i].vdev = vdev;
1140         vdev->vq[i].queue_index = i;
1141     }
1142 
1143     vdev->name = name;
1144     vdev->config_len = config_size;
1145     if (vdev->config_len) {
1146         vdev->config = g_malloc0(config_size);
1147     } else {
1148         vdev->config = NULL;
1149     }
1150     vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
1151                                                      vdev);
1152     vdev->device_endian = virtio_default_endian();
1153 }
1154 
1155 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1156 {
1157     return vdev->vq[n].vring.desc;
1158 }
1159 
1160 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1161 {
1162     return vdev->vq[n].vring.avail;
1163 }
1164 
1165 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1166 {
1167     return vdev->vq[n].vring.used;
1168 }
1169 
1170 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
1171 {
1172     return vdev->vq[n].vring.desc;
1173 }
1174 
1175 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1176 {
1177     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1178 }
1179 
1180 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1181 {
1182     return offsetof(VRingAvail, ring) +
1183         sizeof(uint64_t) * vdev->vq[n].vring.num;
1184 }
1185 
1186 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1187 {
1188     return offsetof(VRingUsed, ring) +
1189         sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1190 }
1191 
1192 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
1193 {
1194     return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
1195 	    virtio_queue_get_used_size(vdev, n);
1196 }
1197 
1198 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1199 {
1200     return vdev->vq[n].last_avail_idx;
1201 }
1202 
1203 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1204 {
1205     vdev->vq[n].last_avail_idx = idx;
1206 }
1207 
1208 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
1209 {
1210     vdev->vq[n].signalled_used_valid = false;
1211 }
1212 
1213 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
1214 {
1215     return vdev->vq + n;
1216 }
1217 
1218 uint16_t virtio_get_queue_index(VirtQueue *vq)
1219 {
1220     return vq->queue_index;
1221 }
1222 
1223 static void virtio_queue_guest_notifier_read(EventNotifier *n)
1224 {
1225     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
1226     if (event_notifier_test_and_clear(n)) {
1227         virtio_irq(vq);
1228     }
1229 }
1230 
1231 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
1232                                                 bool with_irqfd)
1233 {
1234     if (assign && !with_irqfd) {
1235         event_notifier_set_handler(&vq->guest_notifier,
1236                                    virtio_queue_guest_notifier_read);
1237     } else {
1238         event_notifier_set_handler(&vq->guest_notifier, NULL);
1239     }
1240     if (!assign) {
1241         /* Test and clear notifier before closing it,
1242          * in case poll callback didn't have time to run. */
1243         virtio_queue_guest_notifier_read(&vq->guest_notifier);
1244     }
1245 }
1246 
1247 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
1248 {
1249     return &vq->guest_notifier;
1250 }
1251 
1252 static void virtio_queue_host_notifier_read(EventNotifier *n)
1253 {
1254     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1255     if (event_notifier_test_and_clear(n)) {
1256         virtio_queue_notify_vq(vq);
1257     }
1258 }
1259 
1260 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
1261                                                bool set_handler)
1262 {
1263     if (assign && set_handler) {
1264         event_notifier_set_handler(&vq->host_notifier,
1265                                    virtio_queue_host_notifier_read);
1266     } else {
1267         event_notifier_set_handler(&vq->host_notifier, NULL);
1268     }
1269     if (!assign) {
1270         /* Test and clear notifier before after disabling event,
1271          * in case poll callback didn't have time to run. */
1272         virtio_queue_host_notifier_read(&vq->host_notifier);
1273     }
1274 }
1275 
1276 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
1277 {
1278     return &vq->host_notifier;
1279 }
1280 
1281 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
1282 {
1283     g_free(vdev->bus_name);
1284     vdev->bus_name = g_strdup(bus_name);
1285 }
1286 
1287 static void virtio_device_realize(DeviceState *dev, Error **errp)
1288 {
1289     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1290     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1291     Error *err = NULL;
1292 
1293     if (vdc->realize != NULL) {
1294         vdc->realize(dev, &err);
1295         if (err != NULL) {
1296             error_propagate(errp, err);
1297             return;
1298         }
1299     }
1300     virtio_bus_device_plugged(vdev);
1301 }
1302 
1303 static void virtio_device_unrealize(DeviceState *dev, Error **errp)
1304 {
1305     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1306     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1307     Error *err = NULL;
1308 
1309     virtio_bus_device_unplugged(vdev);
1310 
1311     if (vdc->unrealize != NULL) {
1312         vdc->unrealize(dev, &err);
1313         if (err != NULL) {
1314             error_propagate(errp, err);
1315             return;
1316         }
1317     }
1318 
1319     g_free(vdev->bus_name);
1320     vdev->bus_name = NULL;
1321 }
1322 
1323 static void virtio_device_class_init(ObjectClass *klass, void *data)
1324 {
1325     /* Set the default value here. */
1326     DeviceClass *dc = DEVICE_CLASS(klass);
1327 
1328     dc->realize = virtio_device_realize;
1329     dc->unrealize = virtio_device_unrealize;
1330     dc->bus_type = TYPE_VIRTIO_BUS;
1331 }
1332 
1333 static const TypeInfo virtio_device_info = {
1334     .name = TYPE_VIRTIO_DEVICE,
1335     .parent = TYPE_DEVICE,
1336     .instance_size = sizeof(VirtIODevice),
1337     .class_init = virtio_device_class_init,
1338     .abstract = true,
1339     .class_size = sizeof(VirtioDeviceClass),
1340 };
1341 
1342 static void virtio_register_types(void)
1343 {
1344     type_register_static(&virtio_device_info);
1345 }
1346 
1347 type_init(virtio_register_types)
1348