xref: /openbmc/qemu/hw/virtio/virtio.c (revision 88f62c2b)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include <inttypes.h>
15 
16 #include "trace.h"
17 #include "qemu/error-report.h"
18 #include "hw/virtio/virtio.h"
19 #include "qemu/atomic.h"
20 #include "hw/virtio/virtio-bus.h"
21 
22 /* The alignment to use between consumer and producer parts of vring.
23  * x86 pagesize again. */
24 #define VIRTIO_PCI_VRING_ALIGN         4096
25 
26 typedef struct VRingDesc
27 {
28     uint64_t addr;
29     uint32_t len;
30     uint16_t flags;
31     uint16_t next;
32 } VRingDesc;
33 
34 typedef struct VRingAvail
35 {
36     uint16_t flags;
37     uint16_t idx;
38     uint16_t ring[0];
39 } VRingAvail;
40 
41 typedef struct VRingUsedElem
42 {
43     uint32_t id;
44     uint32_t len;
45 } VRingUsedElem;
46 
47 typedef struct VRingUsed
48 {
49     uint16_t flags;
50     uint16_t idx;
51     VRingUsedElem ring[0];
52 } VRingUsed;
53 
54 typedef struct VRing
55 {
56     unsigned int num;
57     hwaddr desc;
58     hwaddr avail;
59     hwaddr used;
60 } VRing;
61 
62 struct VirtQueue
63 {
64     VRing vring;
65     hwaddr pa;
66     uint16_t last_avail_idx;
67     /* Last used index value we have signalled on */
68     uint16_t signalled_used;
69 
70     /* Last used index value we have signalled on */
71     bool signalled_used_valid;
72 
73     /* Notification enabled? */
74     bool notification;
75 
76     uint16_t queue_index;
77 
78     int inuse;
79 
80     uint16_t vector;
81     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
82     VirtIODevice *vdev;
83     EventNotifier guest_notifier;
84     EventNotifier host_notifier;
85 };
86 
87 /* virt queue functions */
88 static void virtqueue_init(VirtQueue *vq)
89 {
90     hwaddr pa = vq->pa;
91 
92     vq->vring.desc = pa;
93     vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
94     vq->vring.used = vring_align(vq->vring.avail +
95                                  offsetof(VRingAvail, ring[vq->vring.num]),
96                                  VIRTIO_PCI_VRING_ALIGN);
97 }
98 
99 static inline uint64_t vring_desc_addr(hwaddr desc_pa, int i)
100 {
101     hwaddr pa;
102     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
103     return ldq_phys(pa);
104 }
105 
106 static inline uint32_t vring_desc_len(hwaddr desc_pa, int i)
107 {
108     hwaddr pa;
109     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
110     return ldl_phys(pa);
111 }
112 
113 static inline uint16_t vring_desc_flags(hwaddr desc_pa, int i)
114 {
115     hwaddr pa;
116     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
117     return lduw_phys(pa);
118 }
119 
120 static inline uint16_t vring_desc_next(hwaddr desc_pa, int i)
121 {
122     hwaddr pa;
123     pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
124     return lduw_phys(pa);
125 }
126 
127 static inline uint16_t vring_avail_flags(VirtQueue *vq)
128 {
129     hwaddr pa;
130     pa = vq->vring.avail + offsetof(VRingAvail, flags);
131     return lduw_phys(pa);
132 }
133 
134 static inline uint16_t vring_avail_idx(VirtQueue *vq)
135 {
136     hwaddr pa;
137     pa = vq->vring.avail + offsetof(VRingAvail, idx);
138     return lduw_phys(pa);
139 }
140 
141 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
142 {
143     hwaddr pa;
144     pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
145     return lduw_phys(pa);
146 }
147 
148 static inline uint16_t vring_used_event(VirtQueue *vq)
149 {
150     return vring_avail_ring(vq, vq->vring.num);
151 }
152 
153 static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
154 {
155     hwaddr pa;
156     pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
157     stl_phys(pa, val);
158 }
159 
160 static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
161 {
162     hwaddr pa;
163     pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
164     stl_phys(pa, val);
165 }
166 
167 static uint16_t vring_used_idx(VirtQueue *vq)
168 {
169     hwaddr pa;
170     pa = vq->vring.used + offsetof(VRingUsed, idx);
171     return lduw_phys(pa);
172 }
173 
174 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
175 {
176     hwaddr pa;
177     pa = vq->vring.used + offsetof(VRingUsed, idx);
178     stw_phys(pa, val);
179 }
180 
181 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
182 {
183     hwaddr pa;
184     pa = vq->vring.used + offsetof(VRingUsed, flags);
185     stw_phys(pa, lduw_phys(pa) | mask);
186 }
187 
188 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
189 {
190     hwaddr pa;
191     pa = vq->vring.used + offsetof(VRingUsed, flags);
192     stw_phys(pa, lduw_phys(pa) & ~mask);
193 }
194 
195 static inline void vring_avail_event(VirtQueue *vq, uint16_t val)
196 {
197     hwaddr pa;
198     if (!vq->notification) {
199         return;
200     }
201     pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
202     stw_phys(pa, val);
203 }
204 
205 void virtio_queue_set_notification(VirtQueue *vq, int enable)
206 {
207     vq->notification = enable;
208     if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
209         vring_avail_event(vq, vring_avail_idx(vq));
210     } else if (enable) {
211         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
212     } else {
213         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
214     }
215     if (enable) {
216         /* Expose avail event/used flags before caller checks the avail idx. */
217         smp_mb();
218     }
219 }
220 
221 int virtio_queue_ready(VirtQueue *vq)
222 {
223     return vq->vring.avail != 0;
224 }
225 
226 int virtio_queue_empty(VirtQueue *vq)
227 {
228     return vring_avail_idx(vq) == vq->last_avail_idx;
229 }
230 
231 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
232                     unsigned int len, unsigned int idx)
233 {
234     unsigned int offset;
235     int i;
236 
237     trace_virtqueue_fill(vq, elem, len, idx);
238 
239     offset = 0;
240     for (i = 0; i < elem->in_num; i++) {
241         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
242 
243         cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
244                                   elem->in_sg[i].iov_len,
245                                   1, size);
246 
247         offset += size;
248     }
249 
250     for (i = 0; i < elem->out_num; i++)
251         cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
252                                   elem->out_sg[i].iov_len,
253                                   0, elem->out_sg[i].iov_len);
254 
255     idx = (idx + vring_used_idx(vq)) % vq->vring.num;
256 
257     /* Get a pointer to the next entry in the used ring. */
258     vring_used_ring_id(vq, idx, elem->index);
259     vring_used_ring_len(vq, idx, len);
260 }
261 
262 void virtqueue_flush(VirtQueue *vq, unsigned int count)
263 {
264     uint16_t old, new;
265     /* Make sure buffer is written before we update index. */
266     smp_wmb();
267     trace_virtqueue_flush(vq, count);
268     old = vring_used_idx(vq);
269     new = old + count;
270     vring_used_idx_set(vq, new);
271     vq->inuse -= count;
272     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
273         vq->signalled_used_valid = false;
274 }
275 
276 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
277                     unsigned int len)
278 {
279     virtqueue_fill(vq, elem, len, 0);
280     virtqueue_flush(vq, 1);
281 }
282 
283 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
284 {
285     uint16_t num_heads = vring_avail_idx(vq) - idx;
286 
287     /* Check it isn't doing very strange things with descriptor numbers. */
288     if (num_heads > vq->vring.num) {
289         error_report("Guest moved used index from %u to %u",
290                      idx, vring_avail_idx(vq));
291         exit(1);
292     }
293     /* On success, callers read a descriptor at vq->last_avail_idx.
294      * Make sure descriptor read does not bypass avail index read. */
295     if (num_heads) {
296         smp_rmb();
297     }
298 
299     return num_heads;
300 }
301 
302 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
303 {
304     unsigned int head;
305 
306     /* Grab the next descriptor number they're advertising, and increment
307      * the index we've seen. */
308     head = vring_avail_ring(vq, idx % vq->vring.num);
309 
310     /* If their number is silly, that's a fatal mistake. */
311     if (head >= vq->vring.num) {
312         error_report("Guest says index %u is available", head);
313         exit(1);
314     }
315 
316     return head;
317 }
318 
319 static unsigned virtqueue_next_desc(hwaddr desc_pa,
320                                     unsigned int i, unsigned int max)
321 {
322     unsigned int next;
323 
324     /* If this descriptor says it doesn't chain, we're done. */
325     if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
326         return max;
327 
328     /* Check they're not leading us off end of descriptors. */
329     next = vring_desc_next(desc_pa, i);
330     /* Make sure compiler knows to grab that: we don't want it changing! */
331     smp_wmb();
332 
333     if (next >= max) {
334         error_report("Desc next is %u", next);
335         exit(1);
336     }
337 
338     return next;
339 }
340 
341 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
342                                unsigned int *out_bytes,
343                                unsigned max_in_bytes, unsigned max_out_bytes)
344 {
345     unsigned int idx;
346     unsigned int total_bufs, in_total, out_total;
347 
348     idx = vq->last_avail_idx;
349 
350     total_bufs = in_total = out_total = 0;
351     while (virtqueue_num_heads(vq, idx)) {
352         unsigned int max, num_bufs, indirect = 0;
353         hwaddr desc_pa;
354         int i;
355 
356         max = vq->vring.num;
357         num_bufs = total_bufs;
358         i = virtqueue_get_head(vq, idx++);
359         desc_pa = vq->vring.desc;
360 
361         if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
362             if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
363                 error_report("Invalid size for indirect buffer table");
364                 exit(1);
365             }
366 
367             /* If we've got too many, that implies a descriptor loop. */
368             if (num_bufs >= max) {
369                 error_report("Looped descriptor");
370                 exit(1);
371             }
372 
373             /* loop over the indirect descriptor table */
374             indirect = 1;
375             max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
376             num_bufs = i = 0;
377             desc_pa = vring_desc_addr(desc_pa, i);
378         }
379 
380         do {
381             /* If we've got too many, that implies a descriptor loop. */
382             if (++num_bufs > max) {
383                 error_report("Looped descriptor");
384                 exit(1);
385             }
386 
387             if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
388                 in_total += vring_desc_len(desc_pa, i);
389             } else {
390                 out_total += vring_desc_len(desc_pa, i);
391             }
392             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
393                 goto done;
394             }
395         } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
396 
397         if (!indirect)
398             total_bufs = num_bufs;
399         else
400             total_bufs++;
401     }
402 done:
403     if (in_bytes) {
404         *in_bytes = in_total;
405     }
406     if (out_bytes) {
407         *out_bytes = out_total;
408     }
409 }
410 
411 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
412                           unsigned int out_bytes)
413 {
414     unsigned int in_total, out_total;
415 
416     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
417     return in_bytes <= in_total && out_bytes <= out_total;
418 }
419 
420 void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
421     size_t num_sg, int is_write)
422 {
423     unsigned int i;
424     hwaddr len;
425 
426     for (i = 0; i < num_sg; i++) {
427         len = sg[i].iov_len;
428         sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
429         if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
430             error_report("virtio: trying to map MMIO memory");
431             exit(1);
432         }
433     }
434 }
435 
436 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
437 {
438     unsigned int i, head, max;
439     hwaddr desc_pa = vq->vring.desc;
440 
441     if (!virtqueue_num_heads(vq, vq->last_avail_idx))
442         return 0;
443 
444     /* When we start there are none of either input nor output. */
445     elem->out_num = elem->in_num = 0;
446 
447     max = vq->vring.num;
448 
449     i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
450     if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
451         vring_avail_event(vq, vring_avail_idx(vq));
452     }
453 
454     if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
455         if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
456             error_report("Invalid size for indirect buffer table");
457             exit(1);
458         }
459 
460         /* loop over the indirect descriptor table */
461         max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
462         desc_pa = vring_desc_addr(desc_pa, i);
463         i = 0;
464     }
465 
466     /* Collect all the descriptors */
467     do {
468         struct iovec *sg;
469 
470         if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
471             if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
472                 error_report("Too many write descriptors in indirect table");
473                 exit(1);
474             }
475             elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
476             sg = &elem->in_sg[elem->in_num++];
477         } else {
478             if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
479                 error_report("Too many read descriptors in indirect table");
480                 exit(1);
481             }
482             elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
483             sg = &elem->out_sg[elem->out_num++];
484         }
485 
486         sg->iov_len = vring_desc_len(desc_pa, i);
487 
488         /* If we've got too many, that implies a descriptor loop. */
489         if ((elem->in_num + elem->out_num) > max) {
490             error_report("Looped descriptor");
491             exit(1);
492         }
493     } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
494 
495     /* Now map what we have collected */
496     virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
497     virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
498 
499     elem->index = head;
500 
501     vq->inuse++;
502 
503     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
504     return elem->in_num + elem->out_num;
505 }
506 
507 /* virtio device */
508 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
509 {
510     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
511     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
512 
513     if (k->notify) {
514         k->notify(qbus->parent, vector);
515     }
516 }
517 
518 void virtio_update_irq(VirtIODevice *vdev)
519 {
520     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
521 }
522 
523 void virtio_set_status(VirtIODevice *vdev, uint8_t val)
524 {
525     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
526     trace_virtio_set_status(vdev, val);
527 
528     if (k->set_status) {
529         k->set_status(vdev, val);
530     }
531     vdev->status = val;
532 }
533 
534 void virtio_reset(void *opaque)
535 {
536     VirtIODevice *vdev = opaque;
537     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
538     int i;
539 
540     virtio_set_status(vdev, 0);
541 
542     if (k->reset) {
543         k->reset(vdev);
544     }
545 
546     vdev->guest_features = 0;
547     vdev->queue_sel = 0;
548     vdev->status = 0;
549     vdev->isr = 0;
550     vdev->config_vector = VIRTIO_NO_VECTOR;
551     virtio_notify_vector(vdev, vdev->config_vector);
552 
553     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
554         vdev->vq[i].vring.desc = 0;
555         vdev->vq[i].vring.avail = 0;
556         vdev->vq[i].vring.used = 0;
557         vdev->vq[i].last_avail_idx = 0;
558         vdev->vq[i].pa = 0;
559         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
560         vdev->vq[i].signalled_used = 0;
561         vdev->vq[i].signalled_used_valid = false;
562         vdev->vq[i].notification = true;
563     }
564 }
565 
566 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
567 {
568     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
569     uint8_t val;
570 
571     if (addr + sizeof(val) > vdev->config_len) {
572         return (uint32_t)-1;
573     }
574 
575     k->get_config(vdev, vdev->config);
576 
577     val = ldub_p(vdev->config + addr);
578     return val;
579 }
580 
581 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
582 {
583     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
584     uint16_t val;
585 
586     if (addr + sizeof(val) > vdev->config_len) {
587         return (uint32_t)-1;
588     }
589 
590     k->get_config(vdev, vdev->config);
591 
592     val = lduw_p(vdev->config + addr);
593     return val;
594 }
595 
596 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
597 {
598     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
599     uint32_t val;
600 
601     if (addr + sizeof(val) > vdev->config_len) {
602         return (uint32_t)-1;
603     }
604 
605     k->get_config(vdev, vdev->config);
606 
607     val = ldl_p(vdev->config + addr);
608     return val;
609 }
610 
611 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
612 {
613     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
614     uint8_t val = data;
615 
616     if (addr + sizeof(val) > vdev->config_len) {
617         return;
618     }
619 
620     stb_p(vdev->config + addr, val);
621 
622     if (k->set_config) {
623         k->set_config(vdev, vdev->config);
624     }
625 }
626 
627 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
628 {
629     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
630     uint16_t val = data;
631 
632     if (addr + sizeof(val) > vdev->config_len) {
633         return;
634     }
635 
636     stw_p(vdev->config + addr, val);
637 
638     if (k->set_config) {
639         k->set_config(vdev, vdev->config);
640     }
641 }
642 
643 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
644 {
645     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
646     uint32_t val = data;
647 
648     if (addr + sizeof(val) > vdev->config_len) {
649         return;
650     }
651 
652     stl_p(vdev->config + addr, val);
653 
654     if (k->set_config) {
655         k->set_config(vdev, vdev->config);
656     }
657 }
658 
659 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
660 {
661     vdev->vq[n].pa = addr;
662     virtqueue_init(&vdev->vq[n]);
663 }
664 
665 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
666 {
667     return vdev->vq[n].pa;
668 }
669 
670 int virtio_queue_get_num(VirtIODevice *vdev, int n)
671 {
672     return vdev->vq[n].vring.num;
673 }
674 
675 int virtio_queue_get_id(VirtQueue *vq)
676 {
677     VirtIODevice *vdev = vq->vdev;
678     assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]);
679     return vq - &vdev->vq[0];
680 }
681 
682 void virtio_queue_notify_vq(VirtQueue *vq)
683 {
684     if (vq->vring.desc) {
685         VirtIODevice *vdev = vq->vdev;
686         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
687         vq->handle_output(vdev, vq);
688     }
689 }
690 
691 void virtio_queue_notify(VirtIODevice *vdev, int n)
692 {
693     virtio_queue_notify_vq(&vdev->vq[n]);
694 }
695 
696 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
697 {
698     return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
699         VIRTIO_NO_VECTOR;
700 }
701 
702 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
703 {
704     if (n < VIRTIO_PCI_QUEUE_MAX)
705         vdev->vq[n].vector = vector;
706 }
707 
708 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
709                             void (*handle_output)(VirtIODevice *, VirtQueue *))
710 {
711     int i;
712 
713     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
714         if (vdev->vq[i].vring.num == 0)
715             break;
716     }
717 
718     if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
719         abort();
720 
721     vdev->vq[i].vring.num = queue_size;
722     vdev->vq[i].handle_output = handle_output;
723 
724     return &vdev->vq[i];
725 }
726 
727 void virtio_del_queue(VirtIODevice *vdev, int n)
728 {
729     if (n < 0 || n >= VIRTIO_PCI_QUEUE_MAX) {
730         abort();
731     }
732 
733     vdev->vq[n].vring.num = 0;
734 }
735 
736 void virtio_irq(VirtQueue *vq)
737 {
738     trace_virtio_irq(vq);
739     vq->vdev->isr |= 0x01;
740     virtio_notify_vector(vq->vdev, vq->vector);
741 }
742 
743 /* Assuming a given event_idx value from the other size, if
744  * we have just incremented index from old to new_idx,
745  * should we trigger an event? */
746 static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old)
747 {
748 	/* Note: Xen has similar logic for notification hold-off
749 	 * in include/xen/interface/io/ring.h with req_event and req_prod
750 	 * corresponding to event_idx + 1 and new respectively.
751 	 * Note also that req_event and req_prod in Xen start at 1,
752 	 * event indexes in virtio start at 0. */
753 	return (uint16_t)(new - event - 1) < (uint16_t)(new - old);
754 }
755 
756 static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
757 {
758     uint16_t old, new;
759     bool v;
760     /* We need to expose used array entries before checking used event. */
761     smp_mb();
762     /* Always notify when queue is empty (when feature acknowledge) */
763     if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) &&
764          !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) {
765         return true;
766     }
767 
768     if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
769         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
770     }
771 
772     v = vq->signalled_used_valid;
773     vq->signalled_used_valid = true;
774     old = vq->signalled_used;
775     new = vq->signalled_used = vring_used_idx(vq);
776     return !v || vring_need_event(vring_used_event(vq), new, old);
777 }
778 
779 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
780 {
781     if (!vring_notify(vdev, vq)) {
782         return;
783     }
784 
785     trace_virtio_notify(vdev, vq);
786     vdev->isr |= 0x01;
787     virtio_notify_vector(vdev, vq->vector);
788 }
789 
790 void virtio_notify_config(VirtIODevice *vdev)
791 {
792     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
793         return;
794 
795     vdev->isr |= 0x03;
796     virtio_notify_vector(vdev, vdev->config_vector);
797 }
798 
799 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
800 {
801     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
802     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
803     int i;
804 
805     if (k->save_config) {
806         k->save_config(qbus->parent, f);
807     }
808 
809     qemu_put_8s(f, &vdev->status);
810     qemu_put_8s(f, &vdev->isr);
811     qemu_put_be16s(f, &vdev->queue_sel);
812     qemu_put_be32s(f, &vdev->guest_features);
813     qemu_put_be32(f, vdev->config_len);
814     qemu_put_buffer(f, vdev->config, vdev->config_len);
815 
816     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
817         if (vdev->vq[i].vring.num == 0)
818             break;
819     }
820 
821     qemu_put_be32(f, i);
822 
823     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
824         if (vdev->vq[i].vring.num == 0)
825             break;
826 
827         qemu_put_be32(f, vdev->vq[i].vring.num);
828         qemu_put_be64(f, vdev->vq[i].pa);
829         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
830         if (k->save_queue) {
831             k->save_queue(qbus->parent, i, f);
832         }
833     }
834 }
835 
836 int virtio_set_features(VirtIODevice *vdev, uint32_t val)
837 {
838     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
839     VirtioBusClass *vbusk = VIRTIO_BUS_GET_CLASS(qbus);
840     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
841     uint32_t supported_features = vbusk->get_features(qbus->parent);
842     bool bad = (val & ~supported_features) != 0;
843 
844     val &= supported_features;
845     if (k->set_features) {
846         k->set_features(vdev, val);
847     }
848     vdev->guest_features = val;
849     return bad ? -1 : 0;
850 }
851 
852 int virtio_load(VirtIODevice *vdev, QEMUFile *f)
853 {
854     int num, i, ret;
855     uint32_t features;
856     uint32_t supported_features;
857     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
858     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
859 
860     if (k->load_config) {
861         ret = k->load_config(qbus->parent, f);
862         if (ret)
863             return ret;
864     }
865 
866     qemu_get_8s(f, &vdev->status);
867     qemu_get_8s(f, &vdev->isr);
868     qemu_get_be16s(f, &vdev->queue_sel);
869     qemu_get_be32s(f, &features);
870 
871     if (virtio_set_features(vdev, features) < 0) {
872         supported_features = k->get_features(qbus->parent);
873         error_report("Features 0x%x unsupported. Allowed features: 0x%x",
874                      features, supported_features);
875         return -1;
876     }
877     vdev->config_len = qemu_get_be32(f);
878     qemu_get_buffer(f, vdev->config, vdev->config_len);
879 
880     num = qemu_get_be32(f);
881 
882     for (i = 0; i < num; i++) {
883         vdev->vq[i].vring.num = qemu_get_be32(f);
884         vdev->vq[i].pa = qemu_get_be64(f);
885         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
886         vdev->vq[i].signalled_used_valid = false;
887         vdev->vq[i].notification = true;
888 
889         if (vdev->vq[i].pa) {
890             uint16_t nheads;
891             virtqueue_init(&vdev->vq[i]);
892             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
893             /* Check it isn't doing very strange things with descriptor numbers. */
894             if (nheads > vdev->vq[i].vring.num) {
895                 error_report("VQ %d size 0x%x Guest index 0x%x "
896                              "inconsistent with Host index 0x%x: delta 0x%x",
897                              i, vdev->vq[i].vring.num,
898                              vring_avail_idx(&vdev->vq[i]),
899                              vdev->vq[i].last_avail_idx, nheads);
900                 return -1;
901             }
902         } else if (vdev->vq[i].last_avail_idx) {
903             error_report("VQ %d address 0x0 "
904                          "inconsistent with Host index 0x%x",
905                          i, vdev->vq[i].last_avail_idx);
906                 return -1;
907 	}
908         if (k->load_queue) {
909             ret = k->load_queue(qbus->parent, i, f);
910             if (ret)
911                 return ret;
912         }
913     }
914 
915     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
916     return 0;
917 }
918 
919 void virtio_cleanup(VirtIODevice *vdev)
920 {
921     qemu_del_vm_change_state_handler(vdev->vmstate);
922     g_free(vdev->config);
923     g_free(vdev->vq);
924 }
925 
926 static void virtio_vmstate_change(void *opaque, int running, RunState state)
927 {
928     VirtIODevice *vdev = opaque;
929     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
930     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
931     bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
932     vdev->vm_running = running;
933 
934     if (backend_run) {
935         virtio_set_status(vdev, vdev->status);
936     }
937 
938     if (k->vmstate_change) {
939         k->vmstate_change(qbus->parent, backend_run);
940     }
941 
942     if (!backend_run) {
943         virtio_set_status(vdev, vdev->status);
944     }
945 }
946 
947 void virtio_init(VirtIODevice *vdev, const char *name,
948                  uint16_t device_id, size_t config_size)
949 {
950     int i;
951     vdev->device_id = device_id;
952     vdev->status = 0;
953     vdev->isr = 0;
954     vdev->queue_sel = 0;
955     vdev->config_vector = VIRTIO_NO_VECTOR;
956     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
957     vdev->vm_running = runstate_is_running();
958     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
959         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
960         vdev->vq[i].vdev = vdev;
961         vdev->vq[i].queue_index = i;
962     }
963 
964     vdev->name = name;
965     vdev->config_len = config_size;
966     if (vdev->config_len) {
967         vdev->config = g_malloc0(config_size);
968     } else {
969         vdev->config = NULL;
970     }
971     vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
972                                                      vdev);
973 }
974 
975 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
976 {
977     return vdev->vq[n].vring.desc;
978 }
979 
980 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
981 {
982     return vdev->vq[n].vring.avail;
983 }
984 
985 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
986 {
987     return vdev->vq[n].vring.used;
988 }
989 
990 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
991 {
992     return vdev->vq[n].vring.desc;
993 }
994 
995 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
996 {
997     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
998 }
999 
1000 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1001 {
1002     return offsetof(VRingAvail, ring) +
1003         sizeof(uint64_t) * vdev->vq[n].vring.num;
1004 }
1005 
1006 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1007 {
1008     return offsetof(VRingUsed, ring) +
1009         sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1010 }
1011 
1012 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
1013 {
1014     return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
1015 	    virtio_queue_get_used_size(vdev, n);
1016 }
1017 
1018 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1019 {
1020     return vdev->vq[n].last_avail_idx;
1021 }
1022 
1023 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1024 {
1025     vdev->vq[n].last_avail_idx = idx;
1026 }
1027 
1028 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
1029 {
1030     return vdev->vq + n;
1031 }
1032 
1033 uint16_t virtio_get_queue_index(VirtQueue *vq)
1034 {
1035     return vq->queue_index;
1036 }
1037 
1038 static void virtio_queue_guest_notifier_read(EventNotifier *n)
1039 {
1040     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
1041     if (event_notifier_test_and_clear(n)) {
1042         virtio_irq(vq);
1043     }
1044 }
1045 
1046 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
1047                                                 bool with_irqfd)
1048 {
1049     if (assign && !with_irqfd) {
1050         event_notifier_set_handler(&vq->guest_notifier,
1051                                    virtio_queue_guest_notifier_read);
1052     } else {
1053         event_notifier_set_handler(&vq->guest_notifier, NULL);
1054     }
1055     if (!assign) {
1056         /* Test and clear notifier before closing it,
1057          * in case poll callback didn't have time to run. */
1058         virtio_queue_guest_notifier_read(&vq->guest_notifier);
1059     }
1060 }
1061 
1062 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
1063 {
1064     return &vq->guest_notifier;
1065 }
1066 
1067 static void virtio_queue_host_notifier_read(EventNotifier *n)
1068 {
1069     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1070     if (event_notifier_test_and_clear(n)) {
1071         virtio_queue_notify_vq(vq);
1072     }
1073 }
1074 
1075 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
1076                                                bool set_handler)
1077 {
1078     if (assign && set_handler) {
1079         event_notifier_set_handler(&vq->host_notifier,
1080                                    virtio_queue_host_notifier_read);
1081     } else {
1082         event_notifier_set_handler(&vq->host_notifier, NULL);
1083     }
1084     if (!assign) {
1085         /* Test and clear notifier before after disabling event,
1086          * in case poll callback didn't have time to run. */
1087         virtio_queue_host_notifier_read(&vq->host_notifier);
1088     }
1089 }
1090 
1091 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
1092 {
1093     return &vq->host_notifier;
1094 }
1095 
1096 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
1097 {
1098     if (vdev->bus_name) {
1099         g_free(vdev->bus_name);
1100         vdev->bus_name = NULL;
1101     }
1102 
1103     if (bus_name) {
1104         vdev->bus_name = g_strdup(bus_name);
1105     }
1106 }
1107 
1108 static int virtio_device_init(DeviceState *qdev)
1109 {
1110     VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
1111     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(qdev);
1112     assert(k->init != NULL);
1113     if (k->init(vdev) < 0) {
1114         return -1;
1115     }
1116     virtio_bus_plug_device(vdev);
1117     return 0;
1118 }
1119 
1120 static int virtio_device_exit(DeviceState *qdev)
1121 {
1122     VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
1123 
1124     if (vdev->bus_name) {
1125         g_free(vdev->bus_name);
1126         vdev->bus_name = NULL;
1127     }
1128     return 0;
1129 }
1130 
1131 static void virtio_device_class_init(ObjectClass *klass, void *data)
1132 {
1133     /* Set the default value here. */
1134     DeviceClass *dc = DEVICE_CLASS(klass);
1135     dc->init = virtio_device_init;
1136     dc->exit = virtio_device_exit;
1137     dc->bus_type = TYPE_VIRTIO_BUS;
1138 }
1139 
1140 static const TypeInfo virtio_device_info = {
1141     .name = TYPE_VIRTIO_DEVICE,
1142     .parent = TYPE_DEVICE,
1143     .instance_size = sizeof(VirtIODevice),
1144     .class_init = virtio_device_class_init,
1145     .abstract = true,
1146     .class_size = sizeof(VirtioDeviceClass),
1147 };
1148 
1149 static void virtio_register_types(void)
1150 {
1151     type_register_static(&virtio_device_info);
1152 }
1153 
1154 type_init(virtio_register_types)
1155