xref: /openbmc/qemu/hw/misc/ivshmem.c (revision 63785678)
1 /*
2  * Inter-VM Shared Memory PCI device.
3  *
4  * Author:
5  *      Cam Macdonell <cam@cs.ualberta.ca>
6  *
7  * Based On: cirrus_vga.c
8  *          Copyright (c) 2004 Fabrice Bellard
9  *          Copyright (c) 2004 Makoto Suzuki (suzu)
10  *
11  *      and rtl8139.c
12  *          Copyright (c) 2006 Igor Kovalenko
13  *
14  * This code is licensed under the GNU GPL v2.
15  *
16  * Contributions after 2012-01-13 are licensed under the terms of the
17  * GNU GPL, version 2 or (at your option) any later version.
18  */
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #include "qemu/cutils.h"
22 #include "hw/hw.h"
23 #include "hw/i386/pc.h"
24 #include "hw/pci/pci.h"
25 #include "hw/pci/msi.h"
26 #include "hw/pci/msix.h"
27 #include "sysemu/kvm.h"
28 #include "migration/migration.h"
29 #include "qemu/error-report.h"
30 #include "qemu/event_notifier.h"
31 #include "qom/object_interfaces.h"
32 #include "sysemu/char.h"
33 #include "sysemu/hostmem.h"
34 #include "sysemu/qtest.h"
35 #include "qapi/visitor.h"
36 #include "exec/ram_addr.h"
37 
38 #include "hw/misc/ivshmem.h"
39 
40 #include <sys/mman.h>
41 
42 #define PCI_VENDOR_ID_IVSHMEM   PCI_VENDOR_ID_REDHAT_QUMRANET
43 #define PCI_DEVICE_ID_IVSHMEM   0x1110
44 
45 #define IVSHMEM_MAX_PEERS UINT16_MAX
46 #define IVSHMEM_IOEVENTFD   0
47 #define IVSHMEM_MSI     1
48 
49 #define IVSHMEM_REG_BAR_SIZE 0x100
50 
51 #define IVSHMEM_DEBUG 0
52 #define IVSHMEM_DPRINTF(fmt, ...)                       \
53     do {                                                \
54         if (IVSHMEM_DEBUG) {                            \
55             printf("IVSHMEM: " fmt, ## __VA_ARGS__);    \
56         }                                               \
57     } while (0)
58 
59 #define TYPE_IVSHMEM_COMMON "ivshmem-common"
60 #define IVSHMEM_COMMON(obj) \
61     OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_COMMON)
62 
63 #define TYPE_IVSHMEM_PLAIN "ivshmem-plain"
64 #define IVSHMEM_PLAIN(obj) \
65     OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_PLAIN)
66 
67 #define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell"
68 #define IVSHMEM_DOORBELL(obj) \
69     OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_DOORBELL)
70 
71 #define TYPE_IVSHMEM "ivshmem"
72 #define IVSHMEM(obj) \
73     OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM)
74 
75 typedef struct Peer {
76     int nb_eventfds;
77     EventNotifier *eventfds;
78 } Peer;
79 
80 typedef struct MSIVector {
81     PCIDevice *pdev;
82     int virq;
83 } MSIVector;
84 
85 typedef struct IVShmemState {
86     /*< private >*/
87     PCIDevice parent_obj;
88     /*< public >*/
89 
90     uint32_t features;
91 
92     /* exactly one of these two may be set */
93     HostMemoryBackend *hostmem; /* with interrupts */
94     CharDriverState *server_chr; /* without interrupts */
95 
96     /* registers */
97     uint32_t intrmask;
98     uint32_t intrstatus;
99     int vm_id;
100 
101     /* BARs */
102     MemoryRegion ivshmem_mmio;  /* BAR 0 (registers) */
103     MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */
104     MemoryRegion server_bar2;   /* used with server_chr */
105 
106     /* interrupt support */
107     Peer *peers;
108     int nb_peers;               /* space in @peers[] */
109     uint32_t vectors;
110     MSIVector *msi_vectors;
111     uint64_t msg_buf;           /* buffer for receiving server messages */
112     int msg_buffered_bytes;     /* #bytes in @msg_buf */
113 
114     /* migration stuff */
115     OnOffAuto master;
116     Error *migration_blocker;
117 
118     /* legacy cruft */
119     char *role;
120     char *shmobj;
121     char *sizearg;
122     size_t legacy_size;
123     uint32_t not_legacy_32bit;
124 } IVShmemState;
125 
126 /* registers for the Inter-VM shared memory device */
127 enum ivshmem_registers {
128     INTRMASK = 0,
129     INTRSTATUS = 4,
130     IVPOSITION = 8,
131     DOORBELL = 12,
132 };
133 
134 static inline uint32_t ivshmem_has_feature(IVShmemState *ivs,
135                                                     unsigned int feature) {
136     return (ivs->features & (1 << feature));
137 }
138 
139 static inline bool ivshmem_is_master(IVShmemState *s)
140 {
141     assert(s->master != ON_OFF_AUTO_AUTO);
142     return s->master == ON_OFF_AUTO_ON;
143 }
144 
145 static void ivshmem_update_irq(IVShmemState *s)
146 {
147     PCIDevice *d = PCI_DEVICE(s);
148     uint32_t isr = s->intrstatus & s->intrmask;
149 
150     /*
151      * Do nothing unless the device actually uses INTx.  Here's how
152      * the device variants signal interrupts, what they put in PCI
153      * config space:
154      * Device variant    Interrupt  Interrupt Pin  MSI-X cap.
155      * ivshmem-plain         none            0         no
156      * ivshmem-doorbell     MSI-X            1        yes(1)
157      * ivshmem,msi=off       INTx            1         no
158      * ivshmem,msi=on       MSI-X            1(2)     yes(1)
159      * (1) if guest enabled MSI-X
160      * (2) the device lies
161      * Leads to the condition for doing nothing:
162      */
163     if (ivshmem_has_feature(s, IVSHMEM_MSI)
164         || !d->config[PCI_INTERRUPT_PIN]) {
165         return;
166     }
167 
168     /* don't print ISR resets */
169     if (isr) {
170         IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
171                         isr ? 1 : 0, s->intrstatus, s->intrmask);
172     }
173 
174     pci_set_irq(d, isr != 0);
175 }
176 
177 static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
178 {
179     IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
180 
181     s->intrmask = val;
182     ivshmem_update_irq(s);
183 }
184 
185 static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
186 {
187     uint32_t ret = s->intrmask;
188 
189     IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
190     return ret;
191 }
192 
193 static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
194 {
195     IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
196 
197     s->intrstatus = val;
198     ivshmem_update_irq(s);
199 }
200 
201 static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
202 {
203     uint32_t ret = s->intrstatus;
204 
205     /* reading ISR clears all interrupts */
206     s->intrstatus = 0;
207     ivshmem_update_irq(s);
208     return ret;
209 }
210 
211 static void ivshmem_io_write(void *opaque, hwaddr addr,
212                              uint64_t val, unsigned size)
213 {
214     IVShmemState *s = opaque;
215 
216     uint16_t dest = val >> 16;
217     uint16_t vector = val & 0xff;
218 
219     addr &= 0xfc;
220 
221     IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr);
222     switch (addr)
223     {
224         case INTRMASK:
225             ivshmem_IntrMask_write(s, val);
226             break;
227 
228         case INTRSTATUS:
229             ivshmem_IntrStatus_write(s, val);
230             break;
231 
232         case DOORBELL:
233             /* check that dest VM ID is reasonable */
234             if (dest >= s->nb_peers) {
235                 IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest);
236                 break;
237             }
238 
239             /* check doorbell range */
240             if (vector < s->peers[dest].nb_eventfds) {
241                 IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector);
242                 event_notifier_set(&s->peers[dest].eventfds[vector]);
243             } else {
244                 IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n",
245                                 vector, dest);
246             }
247             break;
248         default:
249             IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr);
250     }
251 }
252 
253 static uint64_t ivshmem_io_read(void *opaque, hwaddr addr,
254                                 unsigned size)
255 {
256 
257     IVShmemState *s = opaque;
258     uint32_t ret;
259 
260     switch (addr)
261     {
262         case INTRMASK:
263             ret = ivshmem_IntrMask_read(s);
264             break;
265 
266         case INTRSTATUS:
267             ret = ivshmem_IntrStatus_read(s);
268             break;
269 
270         case IVPOSITION:
271             ret = s->vm_id;
272             break;
273 
274         default:
275             IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr);
276             ret = 0;
277     }
278 
279     return ret;
280 }
281 
282 static const MemoryRegionOps ivshmem_mmio_ops = {
283     .read = ivshmem_io_read,
284     .write = ivshmem_io_write,
285     .endianness = DEVICE_NATIVE_ENDIAN,
286     .impl = {
287         .min_access_size = 4,
288         .max_access_size = 4,
289     },
290 };
291 
292 static void ivshmem_vector_notify(void *opaque)
293 {
294     MSIVector *entry = opaque;
295     PCIDevice *pdev = entry->pdev;
296     IVShmemState *s = IVSHMEM_COMMON(pdev);
297     int vector = entry - s->msi_vectors;
298     EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
299 
300     if (!event_notifier_test_and_clear(n)) {
301         return;
302     }
303 
304     IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector);
305     if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
306         if (msix_enabled(pdev)) {
307             msix_notify(pdev, vector);
308         }
309     } else {
310         ivshmem_IntrStatus_write(s, 1);
311     }
312 }
313 
314 static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
315                                  MSIMessage msg)
316 {
317     IVShmemState *s = IVSHMEM_COMMON(dev);
318     EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
319     MSIVector *v = &s->msi_vectors[vector];
320     int ret;
321 
322     IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector);
323 
324     ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev);
325     if (ret < 0) {
326         return ret;
327     }
328 
329     return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
330 }
331 
332 static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector)
333 {
334     IVShmemState *s = IVSHMEM_COMMON(dev);
335     EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
336     int ret;
337 
338     IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector);
339 
340     ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n,
341                                                 s->msi_vectors[vector].virq);
342     if (ret != 0) {
343         error_report("remove_irqfd_notifier_gsi failed");
344     }
345 }
346 
347 static void ivshmem_vector_poll(PCIDevice *dev,
348                                 unsigned int vector_start,
349                                 unsigned int vector_end)
350 {
351     IVShmemState *s = IVSHMEM_COMMON(dev);
352     unsigned int vector;
353 
354     IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end);
355 
356     vector_end = MIN(vector_end, s->vectors);
357 
358     for (vector = vector_start; vector < vector_end; vector++) {
359         EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector];
360 
361         if (!msix_is_masked(dev, vector)) {
362             continue;
363         }
364 
365         if (event_notifier_test_and_clear(notifier)) {
366             msix_set_pending(dev, vector);
367         }
368     }
369 }
370 
371 static void watch_vector_notifier(IVShmemState *s, EventNotifier *n,
372                                  int vector)
373 {
374     int eventfd = event_notifier_get_fd(n);
375 
376     assert(!s->msi_vectors[vector].pdev);
377     s->msi_vectors[vector].pdev = PCI_DEVICE(s);
378 
379     qemu_set_fd_handler(eventfd, ivshmem_vector_notify,
380                         NULL, &s->msi_vectors[vector]);
381 }
382 
383 static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
384 {
385     memory_region_add_eventfd(&s->ivshmem_mmio,
386                               DOORBELL,
387                               4,
388                               true,
389                               (posn << 16) | i,
390                               &s->peers[posn].eventfds[i]);
391 }
392 
393 static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i)
394 {
395     memory_region_del_eventfd(&s->ivshmem_mmio,
396                               DOORBELL,
397                               4,
398                               true,
399                               (posn << 16) | i,
400                               &s->peers[posn].eventfds[i]);
401 }
402 
403 static void close_peer_eventfds(IVShmemState *s, int posn)
404 {
405     int i, n;
406 
407     assert(posn >= 0 && posn < s->nb_peers);
408     n = s->peers[posn].nb_eventfds;
409 
410     if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
411         memory_region_transaction_begin();
412         for (i = 0; i < n; i++) {
413             ivshmem_del_eventfd(s, posn, i);
414         }
415         memory_region_transaction_commit();
416     }
417 
418     for (i = 0; i < n; i++) {
419         event_notifier_cleanup(&s->peers[posn].eventfds[i]);
420     }
421 
422     g_free(s->peers[posn].eventfds);
423     s->peers[posn].nb_eventfds = 0;
424 }
425 
426 static void resize_peers(IVShmemState *s, int nb_peers)
427 {
428     int old_nb_peers = s->nb_peers;
429     int i;
430 
431     assert(nb_peers > old_nb_peers);
432     IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers);
433 
434     s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer));
435     s->nb_peers = nb_peers;
436 
437     for (i = old_nb_peers; i < nb_peers; i++) {
438         s->peers[i].eventfds = g_new0(EventNotifier, s->vectors);
439         s->peers[i].nb_eventfds = 0;
440     }
441 }
442 
443 static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector,
444                                      Error **errp)
445 {
446     PCIDevice *pdev = PCI_DEVICE(s);
447     MSIMessage msg = msix_get_message(pdev, vector);
448     int ret;
449 
450     IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
451     assert(!s->msi_vectors[vector].pdev);
452 
453     ret = kvm_irqchip_add_msi_route(kvm_state, msg, pdev);
454     if (ret < 0) {
455         error_setg(errp, "kvm_irqchip_add_msi_route failed");
456         return;
457     }
458 
459     s->msi_vectors[vector].virq = ret;
460     s->msi_vectors[vector].pdev = pdev;
461 }
462 
463 static void setup_interrupt(IVShmemState *s, int vector, Error **errp)
464 {
465     EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
466     bool with_irqfd = kvm_msi_via_irqfd_enabled() &&
467         ivshmem_has_feature(s, IVSHMEM_MSI);
468     PCIDevice *pdev = PCI_DEVICE(s);
469     Error *err = NULL;
470 
471     IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector);
472 
473     if (!with_irqfd) {
474         IVSHMEM_DPRINTF("with eventfd\n");
475         watch_vector_notifier(s, n, vector);
476     } else if (msix_enabled(pdev)) {
477         IVSHMEM_DPRINTF("with irqfd\n");
478         ivshmem_add_kvm_msi_virq(s, vector, &err);
479         if (err) {
480             error_propagate(errp, err);
481             return;
482         }
483 
484         if (!msix_is_masked(pdev, vector)) {
485             kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL,
486                                                s->msi_vectors[vector].virq);
487             /* TODO handle error */
488         }
489     } else {
490         /* it will be delayed until msix is enabled, in write_config */
491         IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n");
492     }
493 }
494 
495 static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
496 {
497     struct stat buf;
498     size_t size;
499     void *ptr;
500 
501     if (s->ivshmem_bar2) {
502         error_setg(errp, "server sent unexpected shared memory message");
503         close(fd);
504         return;
505     }
506 
507     if (fstat(fd, &buf) < 0) {
508         error_setg_errno(errp, errno,
509             "can't determine size of shared memory sent by server");
510         close(fd);
511         return;
512     }
513 
514     size = buf.st_size;
515 
516     /* Legacy cruft */
517     if (s->legacy_size != SIZE_MAX) {
518         if (size < s->legacy_size) {
519             error_setg(errp, "server sent only %zd bytes of shared memory",
520                        (size_t)buf.st_size);
521             close(fd);
522             return;
523         }
524         size = s->legacy_size;
525     }
526 
527     /* mmap the region and map into the BAR2 */
528     ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
529     if (ptr == MAP_FAILED) {
530         error_setg_errno(errp, errno, "Failed to mmap shared memory");
531         close(fd);
532         return;
533     }
534     memory_region_init_ram_ptr(&s->server_bar2, OBJECT(s),
535                                "ivshmem.bar2", size, ptr);
536     qemu_set_ram_fd(memory_region_get_ram_addr(&s->server_bar2), fd);
537     s->ivshmem_bar2 = &s->server_bar2;
538 }
539 
540 static void process_msg_disconnect(IVShmemState *s, uint16_t posn,
541                                    Error **errp)
542 {
543     IVSHMEM_DPRINTF("posn %d has gone away\n", posn);
544     if (posn >= s->nb_peers || posn == s->vm_id) {
545         error_setg(errp, "invalid peer %d", posn);
546         return;
547     }
548     close_peer_eventfds(s, posn);
549 }
550 
551 static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd,
552                                 Error **errp)
553 {
554     Peer *peer = &s->peers[posn];
555     int vector;
556 
557     /*
558      * The N-th connect message for this peer comes with the file
559      * descriptor for vector N-1.  Count messages to find the vector.
560      */
561     if (peer->nb_eventfds >= s->vectors) {
562         error_setg(errp, "Too many eventfd received, device has %d vectors",
563                    s->vectors);
564         close(fd);
565         return;
566     }
567     vector = peer->nb_eventfds++;
568 
569     IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd);
570     event_notifier_init_fd(&peer->eventfds[vector], fd);
571     fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */
572 
573     if (posn == s->vm_id) {
574         setup_interrupt(s, vector, errp);
575         /* TODO do we need to handle the error? */
576     }
577 
578     if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
579         ivshmem_add_eventfd(s, posn, vector);
580     }
581 }
582 
583 static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp)
584 {
585     IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd);
586 
587     if (msg < -1 || msg > IVSHMEM_MAX_PEERS) {
588         error_setg(errp, "server sent invalid message %" PRId64, msg);
589         close(fd);
590         return;
591     }
592 
593     if (msg == -1) {
594         process_msg_shmem(s, fd, errp);
595         return;
596     }
597 
598     if (msg >= s->nb_peers) {
599         resize_peers(s, msg + 1);
600     }
601 
602     if (fd >= 0) {
603         process_msg_connect(s, msg, fd, errp);
604     } else {
605         process_msg_disconnect(s, msg, errp);
606     }
607 }
608 
609 static int ivshmem_can_receive(void *opaque)
610 {
611     IVShmemState *s = opaque;
612 
613     assert(s->msg_buffered_bytes < sizeof(s->msg_buf));
614     return sizeof(s->msg_buf) - s->msg_buffered_bytes;
615 }
616 
617 static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
618 {
619     IVShmemState *s = opaque;
620     Error *err = NULL;
621     int fd;
622     int64_t msg;
623 
624     assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf));
625     memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size);
626     s->msg_buffered_bytes += size;
627     if (s->msg_buffered_bytes < sizeof(s->msg_buf)) {
628         return;
629     }
630     msg = le64_to_cpu(s->msg_buf);
631     s->msg_buffered_bytes = 0;
632 
633     fd = qemu_chr_fe_get_msgfd(s->server_chr);
634     IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd);
635 
636     process_msg(s, msg, fd, &err);
637     if (err) {
638         error_report_err(err);
639     }
640 }
641 
642 static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp)
643 {
644     int64_t msg;
645     int n, ret;
646 
647     n = 0;
648     do {
649         ret = qemu_chr_fe_read_all(s->server_chr, (uint8_t *)&msg + n,
650                                  sizeof(msg) - n);
651         if (ret < 0 && ret != -EINTR) {
652             error_setg_errno(errp, -ret, "read from server failed");
653             return INT64_MIN;
654         }
655         n += ret;
656     } while (n < sizeof(msg));
657 
658     *pfd = qemu_chr_fe_get_msgfd(s->server_chr);
659     return msg;
660 }
661 
662 static void ivshmem_recv_setup(IVShmemState *s, Error **errp)
663 {
664     Error *err = NULL;
665     int64_t msg;
666     int fd;
667 
668     msg = ivshmem_recv_msg(s, &fd, &err);
669     if (err) {
670         error_propagate(errp, err);
671         return;
672     }
673     if (msg != IVSHMEM_PROTOCOL_VERSION) {
674         error_setg(errp, "server sent version %" PRId64 ", expecting %d",
675                    msg, IVSHMEM_PROTOCOL_VERSION);
676         return;
677     }
678     if (fd != -1) {
679         error_setg(errp, "server sent invalid version message");
680         return;
681     }
682 
683     /*
684      * ivshmem-server sends the remaining initial messages in a fixed
685      * order, but the device has always accepted them in any order.
686      * Stay as compatible as practical, just in case people use
687      * servers that behave differently.
688      */
689 
690     /*
691      * ivshmem_device_spec.txt has always required the ID message
692      * right here, and ivshmem-server has always complied.  However,
693      * older versions of the device accepted it out of order, but
694      * broke when an interrupt setup message arrived before it.
695      */
696     msg = ivshmem_recv_msg(s, &fd, &err);
697     if (err) {
698         error_propagate(errp, err);
699         return;
700     }
701     if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) {
702         error_setg(errp, "server sent invalid ID message");
703         return;
704     }
705     s->vm_id = msg;
706 
707     /*
708      * Receive more messages until we got shared memory.
709      */
710     do {
711         msg = ivshmem_recv_msg(s, &fd, &err);
712         if (err) {
713             error_propagate(errp, err);
714             return;
715         }
716         process_msg(s, msg, fd, &err);
717         if (err) {
718             error_propagate(errp, err);
719             return;
720         }
721     } while (msg != -1);
722 
723     /*
724      * This function must either map the shared memory or fail.  The
725      * loop above ensures that: it terminates normally only after it
726      * successfully processed the server's shared memory message.
727      * Assert that actually mapped the shared memory:
728      */
729     assert(s->ivshmem_bar2);
730 }
731 
732 /* Select the MSI-X vectors used by device.
733  * ivshmem maps events to vectors statically, so
734  * we just enable all vectors on init and after reset. */
735 static void ivshmem_msix_vector_use(IVShmemState *s)
736 {
737     PCIDevice *d = PCI_DEVICE(s);
738     int i;
739 
740     for (i = 0; i < s->vectors; i++) {
741         msix_vector_use(d, i);
742     }
743 }
744 
745 static void ivshmem_reset(DeviceState *d)
746 {
747     IVShmemState *s = IVSHMEM_COMMON(d);
748 
749     s->intrstatus = 0;
750     s->intrmask = 0;
751     if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
752         ivshmem_msix_vector_use(s);
753     }
754 }
755 
756 static int ivshmem_setup_interrupts(IVShmemState *s)
757 {
758     /* allocate QEMU callback data for receiving interrupts */
759     s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector));
760 
761     if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
762         if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1)) {
763             return -1;
764         }
765 
766         IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
767         ivshmem_msix_vector_use(s);
768     }
769 
770     return 0;
771 }
772 
773 static void ivshmem_enable_irqfd(IVShmemState *s)
774 {
775     PCIDevice *pdev = PCI_DEVICE(s);
776     int i;
777 
778     for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
779         Error *err = NULL;
780 
781         ivshmem_add_kvm_msi_virq(s, i, &err);
782         if (err) {
783             error_report_err(err);
784             /* TODO do we need to handle the error? */
785         }
786     }
787 
788     if (msix_set_vector_notifiers(pdev,
789                                   ivshmem_vector_unmask,
790                                   ivshmem_vector_mask,
791                                   ivshmem_vector_poll)) {
792         error_report("ivshmem: msix_set_vector_notifiers failed");
793     }
794 }
795 
796 static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector)
797 {
798     IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector);
799 
800     if (s->msi_vectors[vector].pdev == NULL) {
801         return;
802     }
803 
804     /* it was cleaned when masked in the frontend. */
805     kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq);
806 
807     s->msi_vectors[vector].pdev = NULL;
808 }
809 
810 static void ivshmem_disable_irqfd(IVShmemState *s)
811 {
812     PCIDevice *pdev = PCI_DEVICE(s);
813     int i;
814 
815     for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
816         ivshmem_remove_kvm_msi_virq(s, i);
817     }
818 
819     msix_unset_vector_notifiers(pdev);
820 }
821 
822 static void ivshmem_write_config(PCIDevice *pdev, uint32_t address,
823                                  uint32_t val, int len)
824 {
825     IVShmemState *s = IVSHMEM_COMMON(pdev);
826     int is_enabled, was_enabled = msix_enabled(pdev);
827 
828     pci_default_write_config(pdev, address, val, len);
829     is_enabled = msix_enabled(pdev);
830 
831     if (kvm_msi_via_irqfd_enabled()) {
832         if (!was_enabled && is_enabled) {
833             ivshmem_enable_irqfd(s);
834         } else if (was_enabled && !is_enabled) {
835             ivshmem_disable_irqfd(s);
836         }
837     }
838 }
839 
840 static void ivshmem_common_realize(PCIDevice *dev, Error **errp)
841 {
842     IVShmemState *s = IVSHMEM_COMMON(dev);
843     Error *err = NULL;
844     uint8_t *pci_conf;
845     uint8_t attr = PCI_BASE_ADDRESS_SPACE_MEMORY |
846         PCI_BASE_ADDRESS_MEM_PREFETCH;
847 
848     /* IRQFD requires MSI */
849     if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
850         !ivshmem_has_feature(s, IVSHMEM_MSI)) {
851         error_setg(errp, "ioeventfd/irqfd requires MSI");
852         return;
853     }
854 
855     pci_conf = dev->config;
856     pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
857 
858     memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s,
859                           "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE);
860 
861     /* region for registers*/
862     pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY,
863                      &s->ivshmem_mmio);
864 
865     if (!s->not_legacy_32bit) {
866         attr |= PCI_BASE_ADDRESS_MEM_TYPE_64;
867     }
868 
869     if (s->hostmem != NULL) {
870         IVSHMEM_DPRINTF("using hostmem\n");
871 
872         s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem,
873                                                          &error_abort);
874     } else {
875         IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
876                         s->server_chr->filename);
877 
878         /* we allocate enough space for 16 peers and grow as needed */
879         resize_peers(s, 16);
880 
881         /*
882          * Receive setup messages from server synchronously.
883          * Older versions did it asynchronously, but that creates a
884          * number of entertaining race conditions.
885          */
886         ivshmem_recv_setup(s, &err);
887         if (err) {
888             error_propagate(errp, err);
889             return;
890         }
891 
892         if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) {
893             error_setg(errp,
894                        "master must connect to the server before any peers");
895             return;
896         }
897 
898         qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive,
899                               ivshmem_read, NULL, s);
900 
901         if (ivshmem_setup_interrupts(s) < 0) {
902             error_setg(errp, "failed to initialize interrupts");
903             return;
904         }
905     }
906 
907     vmstate_register_ram(s->ivshmem_bar2, DEVICE(s));
908     pci_register_bar(PCI_DEVICE(s), 2, attr, s->ivshmem_bar2);
909 
910     if (s->master == ON_OFF_AUTO_AUTO) {
911         s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
912     }
913 
914     if (!ivshmem_is_master(s)) {
915         error_setg(&s->migration_blocker,
916                    "Migration is disabled when using feature 'peer mode' in device 'ivshmem'");
917         migrate_add_blocker(s->migration_blocker);
918     }
919 }
920 
921 static void ivshmem_exit(PCIDevice *dev)
922 {
923     IVShmemState *s = IVSHMEM_COMMON(dev);
924     int i;
925 
926     if (s->migration_blocker) {
927         migrate_del_blocker(s->migration_blocker);
928         error_free(s->migration_blocker);
929     }
930 
931     if (memory_region_is_mapped(s->ivshmem_bar2)) {
932         if (!s->hostmem) {
933             void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2);
934             int fd;
935 
936             if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) {
937                 error_report("Failed to munmap shared memory %s",
938                              strerror(errno));
939             }
940 
941             fd = qemu_get_ram_fd(memory_region_get_ram_addr(s->ivshmem_bar2));
942             close(fd);
943         }
944 
945         vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev));
946     }
947 
948     if (s->peers) {
949         for (i = 0; i < s->nb_peers; i++) {
950             close_peer_eventfds(s, i);
951         }
952         g_free(s->peers);
953     }
954 
955     if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
956         msix_uninit_exclusive_bar(dev);
957     }
958 
959     g_free(s->msi_vectors);
960 }
961 
962 static int ivshmem_pre_load(void *opaque)
963 {
964     IVShmemState *s = opaque;
965 
966     if (!ivshmem_is_master(s)) {
967         error_report("'peer' devices are not migratable");
968         return -EINVAL;
969     }
970 
971     return 0;
972 }
973 
974 static int ivshmem_post_load(void *opaque, int version_id)
975 {
976     IVShmemState *s = opaque;
977 
978     if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
979         ivshmem_msix_vector_use(s);
980     }
981     return 0;
982 }
983 
984 static void ivshmem_common_class_init(ObjectClass *klass, void *data)
985 {
986     DeviceClass *dc = DEVICE_CLASS(klass);
987     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
988 
989     k->realize = ivshmem_common_realize;
990     k->exit = ivshmem_exit;
991     k->config_write = ivshmem_write_config;
992     k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
993     k->device_id = PCI_DEVICE_ID_IVSHMEM;
994     k->class_id = PCI_CLASS_MEMORY_RAM;
995     k->revision = 1;
996     dc->reset = ivshmem_reset;
997     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
998     dc->desc = "Inter-VM shared memory";
999 }
1000 
1001 static const TypeInfo ivshmem_common_info = {
1002     .name          = TYPE_IVSHMEM_COMMON,
1003     .parent        = TYPE_PCI_DEVICE,
1004     .instance_size = sizeof(IVShmemState),
1005     .abstract      = true,
1006     .class_init    = ivshmem_common_class_init,
1007 };
1008 
1009 static void ivshmem_check_memdev_is_busy(Object *obj, const char *name,
1010                                          Object *val, Error **errp)
1011 {
1012     MemoryRegion *mr;
1013 
1014     mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), &error_abort);
1015     if (memory_region_is_mapped(mr)) {
1016         char *path = object_get_canonical_path_component(val);
1017         error_setg(errp, "can't use already busy memdev: %s", path);
1018         g_free(path);
1019     } else {
1020         qdev_prop_allow_set_link_before_realize(obj, name, val, errp);
1021     }
1022 }
1023 
1024 static const VMStateDescription ivshmem_plain_vmsd = {
1025     .name = TYPE_IVSHMEM_PLAIN,
1026     .version_id = 0,
1027     .minimum_version_id = 0,
1028     .pre_load = ivshmem_pre_load,
1029     .post_load = ivshmem_post_load,
1030     .fields = (VMStateField[]) {
1031         VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
1032         VMSTATE_UINT32(intrstatus, IVShmemState),
1033         VMSTATE_UINT32(intrmask, IVShmemState),
1034         VMSTATE_END_OF_LIST()
1035     },
1036 };
1037 
1038 static Property ivshmem_plain_properties[] = {
1039     DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
1040     DEFINE_PROP_END_OF_LIST(),
1041 };
1042 
1043 static void ivshmem_plain_init(Object *obj)
1044 {
1045     IVShmemState *s = IVSHMEM_PLAIN(obj);
1046 
1047     object_property_add_link(obj, "memdev", TYPE_MEMORY_BACKEND,
1048                              (Object **)&s->hostmem,
1049                              ivshmem_check_memdev_is_busy,
1050                              OBJ_PROP_LINK_UNREF_ON_RELEASE,
1051                              &error_abort);
1052 }
1053 
1054 static void ivshmem_plain_class_init(ObjectClass *klass, void *data)
1055 {
1056     DeviceClass *dc = DEVICE_CLASS(klass);
1057 
1058     dc->props = ivshmem_plain_properties;
1059     dc->vmsd = &ivshmem_plain_vmsd;
1060 }
1061 
1062 static const TypeInfo ivshmem_plain_info = {
1063     .name          = TYPE_IVSHMEM_PLAIN,
1064     .parent        = TYPE_IVSHMEM_COMMON,
1065     .instance_size = sizeof(IVShmemState),
1066     .instance_init = ivshmem_plain_init,
1067     .class_init    = ivshmem_plain_class_init,
1068 };
1069 
1070 static const VMStateDescription ivshmem_doorbell_vmsd = {
1071     .name = TYPE_IVSHMEM_DOORBELL,
1072     .version_id = 0,
1073     .minimum_version_id = 0,
1074     .pre_load = ivshmem_pre_load,
1075     .post_load = ivshmem_post_load,
1076     .fields = (VMStateField[]) {
1077         VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
1078         VMSTATE_MSIX(parent_obj, IVShmemState),
1079         VMSTATE_UINT32(intrstatus, IVShmemState),
1080         VMSTATE_UINT32(intrmask, IVShmemState),
1081         VMSTATE_END_OF_LIST()
1082     },
1083 };
1084 
1085 static Property ivshmem_doorbell_properties[] = {
1086     DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
1087     DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
1088     DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD,
1089                     true),
1090     DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
1091     DEFINE_PROP_END_OF_LIST(),
1092 };
1093 
1094 static void ivshmem_doorbell_init(Object *obj)
1095 {
1096     IVShmemState *s = IVSHMEM_DOORBELL(obj);
1097 
1098     s->features |= (1 << IVSHMEM_MSI);
1099     s->legacy_size = SIZE_MAX;  /* whatever the server sends */
1100 }
1101 
1102 static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data)
1103 {
1104     DeviceClass *dc = DEVICE_CLASS(klass);
1105 
1106     dc->props = ivshmem_doorbell_properties;
1107     dc->vmsd = &ivshmem_doorbell_vmsd;
1108 }
1109 
1110 static const TypeInfo ivshmem_doorbell_info = {
1111     .name          = TYPE_IVSHMEM_DOORBELL,
1112     .parent        = TYPE_IVSHMEM_COMMON,
1113     .instance_size = sizeof(IVShmemState),
1114     .instance_init = ivshmem_doorbell_init,
1115     .class_init    = ivshmem_doorbell_class_init,
1116 };
1117 
1118 static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id)
1119 {
1120     IVShmemState *s = opaque;
1121     PCIDevice *pdev = PCI_DEVICE(s);
1122     int ret;
1123 
1124     IVSHMEM_DPRINTF("ivshmem_load_old\n");
1125 
1126     if (version_id != 0) {
1127         return -EINVAL;
1128     }
1129 
1130     ret = ivshmem_pre_load(s);
1131     if (ret) {
1132         return ret;
1133     }
1134 
1135     ret = pci_device_load(pdev, f);
1136     if (ret) {
1137         return ret;
1138     }
1139 
1140     if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
1141         msix_load(pdev, f);
1142         ivshmem_msix_vector_use(s);
1143     } else {
1144         s->intrstatus = qemu_get_be32(f);
1145         s->intrmask = qemu_get_be32(f);
1146     }
1147 
1148     return 0;
1149 }
1150 
1151 static bool test_msix(void *opaque, int version_id)
1152 {
1153     IVShmemState *s = opaque;
1154 
1155     return ivshmem_has_feature(s, IVSHMEM_MSI);
1156 }
1157 
1158 static bool test_no_msix(void *opaque, int version_id)
1159 {
1160     return !test_msix(opaque, version_id);
1161 }
1162 
1163 static const VMStateDescription ivshmem_vmsd = {
1164     .name = "ivshmem",
1165     .version_id = 1,
1166     .minimum_version_id = 1,
1167     .pre_load = ivshmem_pre_load,
1168     .post_load = ivshmem_post_load,
1169     .fields = (VMStateField[]) {
1170         VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
1171 
1172         VMSTATE_MSIX_TEST(parent_obj, IVShmemState, test_msix),
1173         VMSTATE_UINT32_TEST(intrstatus, IVShmemState, test_no_msix),
1174         VMSTATE_UINT32_TEST(intrmask, IVShmemState, test_no_msix),
1175 
1176         VMSTATE_END_OF_LIST()
1177     },
1178     .load_state_old = ivshmem_load_old,
1179     .minimum_version_id_old = 0
1180 };
1181 
1182 static Property ivshmem_properties[] = {
1183     DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
1184     DEFINE_PROP_STRING("size", IVShmemState, sizearg),
1185     DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
1186     DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD,
1187                     false),
1188     DEFINE_PROP_BIT("msi", IVShmemState, features, IVSHMEM_MSI, true),
1189     DEFINE_PROP_STRING("shm", IVShmemState, shmobj),
1190     DEFINE_PROP_STRING("role", IVShmemState, role),
1191     DEFINE_PROP_UINT32("use64", IVShmemState, not_legacy_32bit, 1),
1192     DEFINE_PROP_END_OF_LIST(),
1193 };
1194 
1195 static void desugar_shm(IVShmemState *s)
1196 {
1197     Object *obj;
1198     char *path;
1199 
1200     obj = object_new("memory-backend-file");
1201     path = g_strdup_printf("/dev/shm/%s", s->shmobj);
1202     object_property_set_str(obj, path, "mem-path", &error_abort);
1203     g_free(path);
1204     object_property_set_int(obj, s->legacy_size, "size", &error_abort);
1205     object_property_set_bool(obj, true, "share", &error_abort);
1206     object_property_add_child(OBJECT(s), "internal-shm-backend", obj,
1207                               &error_abort);
1208     user_creatable_complete(obj, &error_abort);
1209     s->hostmem = MEMORY_BACKEND(obj);
1210 }
1211 
1212 static void ivshmem_realize(PCIDevice *dev, Error **errp)
1213 {
1214     IVShmemState *s = IVSHMEM_COMMON(dev);
1215 
1216     if (!qtest_enabled()) {
1217         error_report("ivshmem is deprecated, please use ivshmem-plain"
1218                      " or ivshmem-doorbell instead");
1219     }
1220 
1221     if (!!s->server_chr + !!s->shmobj != 1) {
1222         error_setg(errp, "You must specify either 'shm' or 'chardev'");
1223         return;
1224     }
1225 
1226     if (s->sizearg == NULL) {
1227         s->legacy_size = 4 << 20; /* 4 MB default */
1228     } else {
1229         char *end;
1230         int64_t size = qemu_strtosz(s->sizearg, &end);
1231         if (size < 0 || (size_t)size != size || *end != '\0'
1232             || !is_power_of_2(size)) {
1233             error_setg(errp, "Invalid size %s", s->sizearg);
1234             return;
1235         }
1236         s->legacy_size = size;
1237     }
1238 
1239     /* check that role is reasonable */
1240     if (s->role) {
1241         if (strncmp(s->role, "peer", 5) == 0) {
1242             s->master = ON_OFF_AUTO_OFF;
1243         } else if (strncmp(s->role, "master", 7) == 0) {
1244             s->master = ON_OFF_AUTO_ON;
1245         } else {
1246             error_setg(errp, "'role' must be 'peer' or 'master'");
1247             return;
1248         }
1249     } else {
1250         s->master = ON_OFF_AUTO_AUTO;
1251     }
1252 
1253     if (s->shmobj) {
1254         desugar_shm(s);
1255     }
1256 
1257     /*
1258      * Note: we don't use INTx with IVSHMEM_MSI at all, so this is a
1259      * bald-faced lie then.  But it's a backwards compatible lie.
1260      */
1261     pci_config_set_interrupt_pin(dev->config, 1);
1262 
1263     ivshmem_common_realize(dev, errp);
1264 }
1265 
1266 static void ivshmem_class_init(ObjectClass *klass, void *data)
1267 {
1268     DeviceClass *dc = DEVICE_CLASS(klass);
1269     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1270 
1271     k->realize = ivshmem_realize;
1272     k->revision = 0;
1273     dc->desc = "Inter-VM shared memory (legacy)";
1274     dc->props = ivshmem_properties;
1275     dc->vmsd = &ivshmem_vmsd;
1276 }
1277 
1278 static const TypeInfo ivshmem_info = {
1279     .name          = TYPE_IVSHMEM,
1280     .parent        = TYPE_IVSHMEM_COMMON,
1281     .instance_size = sizeof(IVShmemState),
1282     .class_init    = ivshmem_class_init,
1283 };
1284 
1285 static void ivshmem_register_types(void)
1286 {
1287     type_register_static(&ivshmem_common_info);
1288     type_register_static(&ivshmem_plain_info);
1289     type_register_static(&ivshmem_doorbell_info);
1290     type_register_static(&ivshmem_info);
1291 }
1292 
1293 type_init(ivshmem_register_types)
1294