xref: /openbmc/qemu/hw/misc/ivshmem.c (revision c5a5839856119a3644dcc0775a046ed0ee3081c3)
1 /*
2  * Inter-VM Shared Memory PCI device.
3  *
4  * Author:
5  *      Cam Macdonell <cam@cs.ualberta.ca>
6  *
7  * Based On: cirrus_vga.c
8  *          Copyright (c) 2004 Fabrice Bellard
9  *          Copyright (c) 2004 Makoto Suzuki (suzu)
10  *
11  *      and rtl8139.c
12  *          Copyright (c) 2006 Igor Kovalenko
13  *
14  * This code is licensed under the GNU GPL v2.
15  *
16  * Contributions after 2012-01-13 are licensed under the terms of the
17  * GNU GPL, version 2 or (at your option) any later version.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/units.h"
22 #include "qapi/error.h"
23 #include "qemu/cutils.h"
24 #include "hw/pci/pci.h"
25 #include "hw/qdev-properties.h"
26 #include "hw/pci/msi.h"
27 #include "hw/pci/msix.h"
28 #include "sysemu/kvm.h"
29 #include "migration/blocker.h"
30 #include "migration/vmstate.h"
31 #include "qemu/error-report.h"
32 #include "qemu/event_notifier.h"
33 #include "qemu/module.h"
34 #include "qom/object_interfaces.h"
35 #include "chardev/char-fe.h"
36 #include "sysemu/hostmem.h"
37 #include "sysemu/qtest.h"
38 #include "qapi/visitor.h"
39 
40 #include "hw/misc/ivshmem.h"
41 
42 #define PCI_VENDOR_ID_IVSHMEM   PCI_VENDOR_ID_REDHAT_QUMRANET
43 #define PCI_DEVICE_ID_IVSHMEM   0x1110
44 
45 #define IVSHMEM_MAX_PEERS UINT16_MAX
46 #define IVSHMEM_IOEVENTFD   0
47 #define IVSHMEM_MSI     1
48 
49 #define IVSHMEM_REG_BAR_SIZE 0x100
50 
51 #define IVSHMEM_DEBUG 0
52 #define IVSHMEM_DPRINTF(fmt, ...)                       \
53     do {                                                \
54         if (IVSHMEM_DEBUG) {                            \
55             printf("IVSHMEM: " fmt, ## __VA_ARGS__);    \
56         }                                               \
57     } while (0)
58 
59 #define TYPE_IVSHMEM_COMMON "ivshmem-common"
60 #define IVSHMEM_COMMON(obj) \
61     OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_COMMON)
62 
63 #define TYPE_IVSHMEM_PLAIN "ivshmem-plain"
64 #define IVSHMEM_PLAIN(obj) \
65     OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_PLAIN)
66 
67 #define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell"
68 #define IVSHMEM_DOORBELL(obj) \
69     OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_DOORBELL)
70 
71 #define TYPE_IVSHMEM "ivshmem"
72 #define IVSHMEM(obj) \
73     OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM)
74 
75 typedef struct Peer {
76     int nb_eventfds;
77     EventNotifier *eventfds;
78 } Peer;
79 
80 typedef struct MSIVector {
81     PCIDevice *pdev;
82     int virq;
83     bool unmasked;
84 } MSIVector;
85 
86 typedef struct IVShmemState {
87     /*< private >*/
88     PCIDevice parent_obj;
89     /*< public >*/
90 
91     uint32_t features;
92 
93     /* exactly one of these two may be set */
94     HostMemoryBackend *hostmem; /* with interrupts */
95     CharBackend server_chr; /* without interrupts */
96 
97     /* registers */
98     uint32_t intrmask;
99     uint32_t intrstatus;
100     int vm_id;
101 
102     /* BARs */
103     MemoryRegion ivshmem_mmio;  /* BAR 0 (registers) */
104     MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */
105     MemoryRegion server_bar2;   /* used with server_chr */
106 
107     /* interrupt support */
108     Peer *peers;
109     int nb_peers;               /* space in @peers[] */
110     uint32_t vectors;
111     MSIVector *msi_vectors;
112     uint64_t msg_buf;           /* buffer for receiving server messages */
113     int msg_buffered_bytes;     /* #bytes in @msg_buf */
114 
115     /* migration stuff */
116     OnOffAuto master;
117     Error *migration_blocker;
118 } IVShmemState;
119 
120 /* registers for the Inter-VM shared memory device */
121 enum ivshmem_registers {
122     INTRMASK = 0,
123     INTRSTATUS = 4,
124     IVPOSITION = 8,
125     DOORBELL = 12,
126 };
127 
128 static inline uint32_t ivshmem_has_feature(IVShmemState *ivs,
129                                                     unsigned int feature) {
130     return (ivs->features & (1 << feature));
131 }
132 
133 static inline bool ivshmem_is_master(IVShmemState *s)
134 {
135     assert(s->master != ON_OFF_AUTO_AUTO);
136     return s->master == ON_OFF_AUTO_ON;
137 }
138 
139 static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
140 {
141     IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
142 
143     s->intrmask = val;
144 }
145 
146 static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
147 {
148     uint32_t ret = s->intrmask;
149 
150     IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
151     return ret;
152 }
153 
154 static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
155 {
156     IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
157 
158     s->intrstatus = val;
159 }
160 
161 static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
162 {
163     uint32_t ret = s->intrstatus;
164 
165     /* reading ISR clears all interrupts */
166     s->intrstatus = 0;
167     return ret;
168 }
169 
170 static void ivshmem_io_write(void *opaque, hwaddr addr,
171                              uint64_t val, unsigned size)
172 {
173     IVShmemState *s = opaque;
174 
175     uint16_t dest = val >> 16;
176     uint16_t vector = val & 0xff;
177 
178     addr &= 0xfc;
179 
180     IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr);
181     switch (addr)
182     {
183         case INTRMASK:
184             ivshmem_IntrMask_write(s, val);
185             break;
186 
187         case INTRSTATUS:
188             ivshmem_IntrStatus_write(s, val);
189             break;
190 
191         case DOORBELL:
192             /* check that dest VM ID is reasonable */
193             if (dest >= s->nb_peers) {
194                 IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest);
195                 break;
196             }
197 
198             /* check doorbell range */
199             if (vector < s->peers[dest].nb_eventfds) {
200                 IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector);
201                 event_notifier_set(&s->peers[dest].eventfds[vector]);
202             } else {
203                 IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n",
204                                 vector, dest);
205             }
206             break;
207         default:
208             IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr);
209     }
210 }
211 
212 static uint64_t ivshmem_io_read(void *opaque, hwaddr addr,
213                                 unsigned size)
214 {
215 
216     IVShmemState *s = opaque;
217     uint32_t ret;
218 
219     switch (addr)
220     {
221         case INTRMASK:
222             ret = ivshmem_IntrMask_read(s);
223             break;
224 
225         case INTRSTATUS:
226             ret = ivshmem_IntrStatus_read(s);
227             break;
228 
229         case IVPOSITION:
230             ret = s->vm_id;
231             break;
232 
233         default:
234             IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr);
235             ret = 0;
236     }
237 
238     return ret;
239 }
240 
241 static const MemoryRegionOps ivshmem_mmio_ops = {
242     .read = ivshmem_io_read,
243     .write = ivshmem_io_write,
244     .endianness = DEVICE_NATIVE_ENDIAN,
245     .impl = {
246         .min_access_size = 4,
247         .max_access_size = 4,
248     },
249 };
250 
251 static void ivshmem_vector_notify(void *opaque)
252 {
253     MSIVector *entry = opaque;
254     PCIDevice *pdev = entry->pdev;
255     IVShmemState *s = IVSHMEM_COMMON(pdev);
256     int vector = entry - s->msi_vectors;
257     EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
258 
259     if (!event_notifier_test_and_clear(n)) {
260         return;
261     }
262 
263     IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector);
264     if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
265         if (msix_enabled(pdev)) {
266             msix_notify(pdev, vector);
267         }
268     } else {
269         ivshmem_IntrStatus_write(s, 1);
270     }
271 }
272 
273 static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
274                                  MSIMessage msg)
275 {
276     IVShmemState *s = IVSHMEM_COMMON(dev);
277     EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
278     MSIVector *v = &s->msi_vectors[vector];
279     int ret;
280 
281     IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector);
282     if (!v->pdev) {
283         error_report("ivshmem: vector %d route does not exist", vector);
284         return -EINVAL;
285     }
286     assert(!v->unmasked);
287 
288     ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev);
289     if (ret < 0) {
290         return ret;
291     }
292     kvm_irqchip_commit_routes(kvm_state);
293 
294     ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
295     if (ret < 0) {
296         return ret;
297     }
298     v->unmasked = true;
299 
300     return 0;
301 }
302 
303 static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector)
304 {
305     IVShmemState *s = IVSHMEM_COMMON(dev);
306     EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
307     MSIVector *v = &s->msi_vectors[vector];
308     int ret;
309 
310     IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector);
311     if (!v->pdev) {
312         error_report("ivshmem: vector %d route does not exist", vector);
313         return;
314     }
315     assert(v->unmasked);
316 
317     ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq);
318     if (ret < 0) {
319         error_report("remove_irqfd_notifier_gsi failed");
320         return;
321     }
322     v->unmasked = false;
323 }
324 
325 static void ivshmem_vector_poll(PCIDevice *dev,
326                                 unsigned int vector_start,
327                                 unsigned int vector_end)
328 {
329     IVShmemState *s = IVSHMEM_COMMON(dev);
330     unsigned int vector;
331 
332     IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end);
333 
334     vector_end = MIN(vector_end, s->vectors);
335 
336     for (vector = vector_start; vector < vector_end; vector++) {
337         EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector];
338 
339         if (!msix_is_masked(dev, vector)) {
340             continue;
341         }
342 
343         if (event_notifier_test_and_clear(notifier)) {
344             msix_set_pending(dev, vector);
345         }
346     }
347 }
348 
349 static void watch_vector_notifier(IVShmemState *s, EventNotifier *n,
350                                  int vector)
351 {
352     int eventfd = event_notifier_get_fd(n);
353 
354     assert(!s->msi_vectors[vector].pdev);
355     s->msi_vectors[vector].pdev = PCI_DEVICE(s);
356 
357     qemu_set_fd_handler(eventfd, ivshmem_vector_notify,
358                         NULL, &s->msi_vectors[vector]);
359 }
360 
361 static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
362 {
363     memory_region_add_eventfd(&s->ivshmem_mmio,
364                               DOORBELL,
365                               4,
366                               true,
367                               (posn << 16) | i,
368                               &s->peers[posn].eventfds[i]);
369 }
370 
371 static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i)
372 {
373     memory_region_del_eventfd(&s->ivshmem_mmio,
374                               DOORBELL,
375                               4,
376                               true,
377                               (posn << 16) | i,
378                               &s->peers[posn].eventfds[i]);
379 }
380 
381 static void close_peer_eventfds(IVShmemState *s, int posn)
382 {
383     int i, n;
384 
385     assert(posn >= 0 && posn < s->nb_peers);
386     n = s->peers[posn].nb_eventfds;
387 
388     if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
389         memory_region_transaction_begin();
390         for (i = 0; i < n; i++) {
391             ivshmem_del_eventfd(s, posn, i);
392         }
393         memory_region_transaction_commit();
394     }
395 
396     for (i = 0; i < n; i++) {
397         event_notifier_cleanup(&s->peers[posn].eventfds[i]);
398     }
399 
400     g_free(s->peers[posn].eventfds);
401     s->peers[posn].nb_eventfds = 0;
402 }
403 
404 static void resize_peers(IVShmemState *s, int nb_peers)
405 {
406     int old_nb_peers = s->nb_peers;
407     int i;
408 
409     assert(nb_peers > old_nb_peers);
410     IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers);
411 
412     s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer));
413     s->nb_peers = nb_peers;
414 
415     for (i = old_nb_peers; i < nb_peers; i++) {
416         s->peers[i].eventfds = g_new0(EventNotifier, s->vectors);
417         s->peers[i].nb_eventfds = 0;
418     }
419 }
420 
421 static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector,
422                                      Error **errp)
423 {
424     PCIDevice *pdev = PCI_DEVICE(s);
425     int ret;
426 
427     IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
428     assert(!s->msi_vectors[vector].pdev);
429 
430     ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev);
431     if (ret < 0) {
432         error_setg(errp, "kvm_irqchip_add_msi_route failed");
433         return;
434     }
435 
436     s->msi_vectors[vector].virq = ret;
437     s->msi_vectors[vector].pdev = pdev;
438 }
439 
440 static void setup_interrupt(IVShmemState *s, int vector, Error **errp)
441 {
442     EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
443     bool with_irqfd = kvm_msi_via_irqfd_enabled() &&
444         ivshmem_has_feature(s, IVSHMEM_MSI);
445     PCIDevice *pdev = PCI_DEVICE(s);
446     Error *err = NULL;
447 
448     IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector);
449 
450     if (!with_irqfd) {
451         IVSHMEM_DPRINTF("with eventfd\n");
452         watch_vector_notifier(s, n, vector);
453     } else if (msix_enabled(pdev)) {
454         IVSHMEM_DPRINTF("with irqfd\n");
455         ivshmem_add_kvm_msi_virq(s, vector, &err);
456         if (err) {
457             error_propagate(errp, err);
458             return;
459         }
460 
461         if (!msix_is_masked(pdev, vector)) {
462             kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL,
463                                                s->msi_vectors[vector].virq);
464             /* TODO handle error */
465         }
466     } else {
467         /* it will be delayed until msix is enabled, in write_config */
468         IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n");
469     }
470 }
471 
472 static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
473 {
474     Error *local_err = NULL;
475     struct stat buf;
476     size_t size;
477 
478     if (s->ivshmem_bar2) {
479         error_setg(errp, "server sent unexpected shared memory message");
480         close(fd);
481         return;
482     }
483 
484     if (fstat(fd, &buf) < 0) {
485         error_setg_errno(errp, errno,
486             "can't determine size of shared memory sent by server");
487         close(fd);
488         return;
489     }
490 
491     size = buf.st_size;
492 
493     /* mmap the region and map into the BAR2 */
494     memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s),
495                                    "ivshmem.bar2", size, true, fd, &local_err);
496     if (local_err) {
497         error_propagate(errp, local_err);
498         return;
499     }
500 
501     s->ivshmem_bar2 = &s->server_bar2;
502 }
503 
504 static void process_msg_disconnect(IVShmemState *s, uint16_t posn,
505                                    Error **errp)
506 {
507     IVSHMEM_DPRINTF("posn %d has gone away\n", posn);
508     if (posn >= s->nb_peers || posn == s->vm_id) {
509         error_setg(errp, "invalid peer %d", posn);
510         return;
511     }
512     close_peer_eventfds(s, posn);
513 }
514 
515 static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd,
516                                 Error **errp)
517 {
518     Peer *peer = &s->peers[posn];
519     int vector;
520 
521     /*
522      * The N-th connect message for this peer comes with the file
523      * descriptor for vector N-1.  Count messages to find the vector.
524      */
525     if (peer->nb_eventfds >= s->vectors) {
526         error_setg(errp, "Too many eventfd received, device has %d vectors",
527                    s->vectors);
528         close(fd);
529         return;
530     }
531     vector = peer->nb_eventfds++;
532 
533     IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd);
534     event_notifier_init_fd(&peer->eventfds[vector], fd);
535     fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */
536 
537     if (posn == s->vm_id) {
538         setup_interrupt(s, vector, errp);
539         /* TODO do we need to handle the error? */
540     }
541 
542     if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
543         ivshmem_add_eventfd(s, posn, vector);
544     }
545 }
546 
547 static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp)
548 {
549     IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd);
550 
551     if (msg < -1 || msg > IVSHMEM_MAX_PEERS) {
552         error_setg(errp, "server sent invalid message %" PRId64, msg);
553         close(fd);
554         return;
555     }
556 
557     if (msg == -1) {
558         process_msg_shmem(s, fd, errp);
559         return;
560     }
561 
562     if (msg >= s->nb_peers) {
563         resize_peers(s, msg + 1);
564     }
565 
566     if (fd >= 0) {
567         process_msg_connect(s, msg, fd, errp);
568     } else {
569         process_msg_disconnect(s, msg, errp);
570     }
571 }
572 
573 static int ivshmem_can_receive(void *opaque)
574 {
575     IVShmemState *s = opaque;
576 
577     assert(s->msg_buffered_bytes < sizeof(s->msg_buf));
578     return sizeof(s->msg_buf) - s->msg_buffered_bytes;
579 }
580 
581 static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
582 {
583     IVShmemState *s = opaque;
584     Error *err = NULL;
585     int fd;
586     int64_t msg;
587 
588     assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf));
589     memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size);
590     s->msg_buffered_bytes += size;
591     if (s->msg_buffered_bytes < sizeof(s->msg_buf)) {
592         return;
593     }
594     msg = le64_to_cpu(s->msg_buf);
595     s->msg_buffered_bytes = 0;
596 
597     fd = qemu_chr_fe_get_msgfd(&s->server_chr);
598 
599     process_msg(s, msg, fd, &err);
600     if (err) {
601         error_report_err(err);
602     }
603 }
604 
605 static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp)
606 {
607     int64_t msg;
608     int n, ret;
609 
610     n = 0;
611     do {
612         ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n,
613                                    sizeof(msg) - n);
614         if (ret < 0) {
615             if (ret == -EINTR) {
616                 continue;
617             }
618             error_setg_errno(errp, -ret, "read from server failed");
619             return INT64_MIN;
620         }
621         n += ret;
622     } while (n < sizeof(msg));
623 
624     *pfd = qemu_chr_fe_get_msgfd(&s->server_chr);
625     return le64_to_cpu(msg);
626 }
627 
628 static void ivshmem_recv_setup(IVShmemState *s, Error **errp)
629 {
630     Error *err = NULL;
631     int64_t msg;
632     int fd;
633 
634     msg = ivshmem_recv_msg(s, &fd, &err);
635     if (err) {
636         error_propagate(errp, err);
637         return;
638     }
639     if (msg != IVSHMEM_PROTOCOL_VERSION) {
640         error_setg(errp, "server sent version %" PRId64 ", expecting %d",
641                    msg, IVSHMEM_PROTOCOL_VERSION);
642         return;
643     }
644     if (fd != -1) {
645         error_setg(errp, "server sent invalid version message");
646         return;
647     }
648 
649     /*
650      * ivshmem-server sends the remaining initial messages in a fixed
651      * order, but the device has always accepted them in any order.
652      * Stay as compatible as practical, just in case people use
653      * servers that behave differently.
654      */
655 
656     /*
657      * ivshmem_device_spec.txt has always required the ID message
658      * right here, and ivshmem-server has always complied.  However,
659      * older versions of the device accepted it out of order, but
660      * broke when an interrupt setup message arrived before it.
661      */
662     msg = ivshmem_recv_msg(s, &fd, &err);
663     if (err) {
664         error_propagate(errp, err);
665         return;
666     }
667     if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) {
668         error_setg(errp, "server sent invalid ID message");
669         return;
670     }
671     s->vm_id = msg;
672 
673     /*
674      * Receive more messages until we got shared memory.
675      */
676     do {
677         msg = ivshmem_recv_msg(s, &fd, &err);
678         if (err) {
679             error_propagate(errp, err);
680             return;
681         }
682         process_msg(s, msg, fd, &err);
683         if (err) {
684             error_propagate(errp, err);
685             return;
686         }
687     } while (msg != -1);
688 
689     /*
690      * This function must either map the shared memory or fail.  The
691      * loop above ensures that: it terminates normally only after it
692      * successfully processed the server's shared memory message.
693      * Assert that actually mapped the shared memory:
694      */
695     assert(s->ivshmem_bar2);
696 }
697 
698 /* Select the MSI-X vectors used by device.
699  * ivshmem maps events to vectors statically, so
700  * we just enable all vectors on init and after reset. */
701 static void ivshmem_msix_vector_use(IVShmemState *s)
702 {
703     PCIDevice *d = PCI_DEVICE(s);
704     int i;
705 
706     for (i = 0; i < s->vectors; i++) {
707         msix_vector_use(d, i);
708     }
709 }
710 
711 static void ivshmem_disable_irqfd(IVShmemState *s);
712 
713 static void ivshmem_reset(DeviceState *d)
714 {
715     IVShmemState *s = IVSHMEM_COMMON(d);
716 
717     ivshmem_disable_irqfd(s);
718 
719     s->intrstatus = 0;
720     s->intrmask = 0;
721     if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
722         ivshmem_msix_vector_use(s);
723     }
724 }
725 
726 static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp)
727 {
728     /* allocate QEMU callback data for receiving interrupts */
729     s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector));
730 
731     if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
732         if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) {
733             return -1;
734         }
735 
736         IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
737         ivshmem_msix_vector_use(s);
738     }
739 
740     return 0;
741 }
742 
743 static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector)
744 {
745     IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector);
746 
747     if (s->msi_vectors[vector].pdev == NULL) {
748         return;
749     }
750 
751     /* it was cleaned when masked in the frontend. */
752     kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq);
753 
754     s->msi_vectors[vector].pdev = NULL;
755 }
756 
757 static void ivshmem_enable_irqfd(IVShmemState *s)
758 {
759     PCIDevice *pdev = PCI_DEVICE(s);
760     int i;
761 
762     for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
763         Error *err = NULL;
764 
765         ivshmem_add_kvm_msi_virq(s, i, &err);
766         if (err) {
767             error_report_err(err);
768             goto undo;
769         }
770     }
771 
772     if (msix_set_vector_notifiers(pdev,
773                                   ivshmem_vector_unmask,
774                                   ivshmem_vector_mask,
775                                   ivshmem_vector_poll)) {
776         error_report("ivshmem: msix_set_vector_notifiers failed");
777         goto undo;
778     }
779     return;
780 
781 undo:
782     while (--i >= 0) {
783         ivshmem_remove_kvm_msi_virq(s, i);
784     }
785 }
786 
787 static void ivshmem_disable_irqfd(IVShmemState *s)
788 {
789     PCIDevice *pdev = PCI_DEVICE(s);
790     int i;
791 
792     if (!pdev->msix_vector_use_notifier) {
793         return;
794     }
795 
796     msix_unset_vector_notifiers(pdev);
797 
798     for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
799         /*
800          * MSI-X is already disabled here so msix_unset_vector_notifiers()
801          * didn't call our release notifier.  Do it now to keep our masks and
802          * unmasks balanced.
803          */
804         if (s->msi_vectors[i].unmasked) {
805             ivshmem_vector_mask(pdev, i);
806         }
807         ivshmem_remove_kvm_msi_virq(s, i);
808     }
809 
810 }
811 
812 static void ivshmem_write_config(PCIDevice *pdev, uint32_t address,
813                                  uint32_t val, int len)
814 {
815     IVShmemState *s = IVSHMEM_COMMON(pdev);
816     int is_enabled, was_enabled = msix_enabled(pdev);
817 
818     pci_default_write_config(pdev, address, val, len);
819     is_enabled = msix_enabled(pdev);
820 
821     if (kvm_msi_via_irqfd_enabled()) {
822         if (!was_enabled && is_enabled) {
823             ivshmem_enable_irqfd(s);
824         } else if (was_enabled && !is_enabled) {
825             ivshmem_disable_irqfd(s);
826         }
827     }
828 }
829 
830 static void ivshmem_common_realize(PCIDevice *dev, Error **errp)
831 {
832     IVShmemState *s = IVSHMEM_COMMON(dev);
833     Error *err = NULL;
834     uint8_t *pci_conf;
835 
836     /* IRQFD requires MSI */
837     if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
838         !ivshmem_has_feature(s, IVSHMEM_MSI)) {
839         error_setg(errp, "ioeventfd/irqfd requires MSI");
840         return;
841     }
842 
843     pci_conf = dev->config;
844     pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
845 
846     memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s,
847                           "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE);
848 
849     /* region for registers*/
850     pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY,
851                      &s->ivshmem_mmio);
852 
853     if (s->hostmem != NULL) {
854         IVSHMEM_DPRINTF("using hostmem\n");
855 
856         s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem);
857         host_memory_backend_set_mapped(s->hostmem, true);
858     } else {
859         Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr);
860         assert(chr);
861 
862         IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
863                         chr->filename);
864 
865         /* we allocate enough space for 16 peers and grow as needed */
866         resize_peers(s, 16);
867 
868         /*
869          * Receive setup messages from server synchronously.
870          * Older versions did it asynchronously, but that creates a
871          * number of entertaining race conditions.
872          */
873         ivshmem_recv_setup(s, &err);
874         if (err) {
875             error_propagate(errp, err);
876             return;
877         }
878 
879         if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) {
880             error_setg(errp,
881                        "master must connect to the server before any peers");
882             return;
883         }
884 
885         qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive,
886                                  ivshmem_read, NULL, NULL, s, NULL, true);
887 
888         if (ivshmem_setup_interrupts(s, errp) < 0) {
889             error_prepend(errp, "Failed to initialize interrupts: ");
890             return;
891         }
892     }
893 
894     if (s->master == ON_OFF_AUTO_AUTO) {
895         s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
896     }
897 
898     if (!ivshmem_is_master(s)) {
899         error_setg(&s->migration_blocker,
900                    "Migration is disabled when using feature 'peer mode' in device 'ivshmem'");
901         migrate_add_blocker(s->migration_blocker, &err);
902         if (err) {
903             error_propagate(errp, err);
904             error_free(s->migration_blocker);
905             return;
906         }
907     }
908 
909     vmstate_register_ram(s->ivshmem_bar2, DEVICE(s));
910     pci_register_bar(PCI_DEVICE(s), 2,
911                      PCI_BASE_ADDRESS_SPACE_MEMORY |
912                      PCI_BASE_ADDRESS_MEM_PREFETCH |
913                      PCI_BASE_ADDRESS_MEM_TYPE_64,
914                      s->ivshmem_bar2);
915 }
916 
917 static void ivshmem_exit(PCIDevice *dev)
918 {
919     IVShmemState *s = IVSHMEM_COMMON(dev);
920     int i;
921 
922     if (s->migration_blocker) {
923         migrate_del_blocker(s->migration_blocker);
924         error_free(s->migration_blocker);
925     }
926 
927     if (memory_region_is_mapped(s->ivshmem_bar2)) {
928         if (!s->hostmem) {
929             void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2);
930             int fd;
931 
932             if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) {
933                 error_report("Failed to munmap shared memory %s",
934                              strerror(errno));
935             }
936 
937             fd = memory_region_get_fd(s->ivshmem_bar2);
938             close(fd);
939         }
940 
941         vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev));
942     }
943 
944     if (s->hostmem) {
945         host_memory_backend_set_mapped(s->hostmem, false);
946     }
947 
948     if (s->peers) {
949         for (i = 0; i < s->nb_peers; i++) {
950             close_peer_eventfds(s, i);
951         }
952         g_free(s->peers);
953     }
954 
955     if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
956         msix_uninit_exclusive_bar(dev);
957     }
958 
959     g_free(s->msi_vectors);
960 }
961 
962 static int ivshmem_pre_load(void *opaque)
963 {
964     IVShmemState *s = opaque;
965 
966     if (!ivshmem_is_master(s)) {
967         error_report("'peer' devices are not migratable");
968         return -EINVAL;
969     }
970 
971     return 0;
972 }
973 
974 static int ivshmem_post_load(void *opaque, int version_id)
975 {
976     IVShmemState *s = opaque;
977 
978     if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
979         ivshmem_msix_vector_use(s);
980     }
981     return 0;
982 }
983 
984 static void ivshmem_common_class_init(ObjectClass *klass, void *data)
985 {
986     DeviceClass *dc = DEVICE_CLASS(klass);
987     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
988 
989     k->realize = ivshmem_common_realize;
990     k->exit = ivshmem_exit;
991     k->config_write = ivshmem_write_config;
992     k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
993     k->device_id = PCI_DEVICE_ID_IVSHMEM;
994     k->class_id = PCI_CLASS_MEMORY_RAM;
995     k->revision = 1;
996     dc->reset = ivshmem_reset;
997     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
998     dc->desc = "Inter-VM shared memory";
999 }
1000 
1001 static const TypeInfo ivshmem_common_info = {
1002     .name          = TYPE_IVSHMEM_COMMON,
1003     .parent        = TYPE_PCI_DEVICE,
1004     .instance_size = sizeof(IVShmemState),
1005     .abstract      = true,
1006     .class_init    = ivshmem_common_class_init,
1007     .interfaces = (InterfaceInfo[]) {
1008         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1009         { },
1010     },
1011 };
1012 
1013 static const VMStateDescription ivshmem_plain_vmsd = {
1014     .name = TYPE_IVSHMEM_PLAIN,
1015     .version_id = 0,
1016     .minimum_version_id = 0,
1017     .pre_load = ivshmem_pre_load,
1018     .post_load = ivshmem_post_load,
1019     .fields = (VMStateField[]) {
1020         VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
1021         VMSTATE_UINT32(intrstatus, IVShmemState),
1022         VMSTATE_UINT32(intrmask, IVShmemState),
1023         VMSTATE_END_OF_LIST()
1024     },
1025 };
1026 
1027 static Property ivshmem_plain_properties[] = {
1028     DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
1029     DEFINE_PROP_LINK("memdev", IVShmemState, hostmem, TYPE_MEMORY_BACKEND,
1030                      HostMemoryBackend *),
1031     DEFINE_PROP_END_OF_LIST(),
1032 };
1033 
1034 static void ivshmem_plain_realize(PCIDevice *dev, Error **errp)
1035 {
1036     IVShmemState *s = IVSHMEM_COMMON(dev);
1037 
1038     if (!s->hostmem) {
1039         error_setg(errp, "You must specify a 'memdev'");
1040         return;
1041     } else if (host_memory_backend_is_mapped(s->hostmem)) {
1042         char *path = object_get_canonical_path_component(OBJECT(s->hostmem));
1043         error_setg(errp, "can't use already busy memdev: %s", path);
1044         g_free(path);
1045         return;
1046     }
1047 
1048     ivshmem_common_realize(dev, errp);
1049 }
1050 
1051 static void ivshmem_plain_class_init(ObjectClass *klass, void *data)
1052 {
1053     DeviceClass *dc = DEVICE_CLASS(klass);
1054     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1055 
1056     k->realize = ivshmem_plain_realize;
1057     device_class_set_props(dc, ivshmem_plain_properties);
1058     dc->vmsd = &ivshmem_plain_vmsd;
1059 }
1060 
1061 static const TypeInfo ivshmem_plain_info = {
1062     .name          = TYPE_IVSHMEM_PLAIN,
1063     .parent        = TYPE_IVSHMEM_COMMON,
1064     .instance_size = sizeof(IVShmemState),
1065     .class_init    = ivshmem_plain_class_init,
1066 };
1067 
1068 static const VMStateDescription ivshmem_doorbell_vmsd = {
1069     .name = TYPE_IVSHMEM_DOORBELL,
1070     .version_id = 0,
1071     .minimum_version_id = 0,
1072     .pre_load = ivshmem_pre_load,
1073     .post_load = ivshmem_post_load,
1074     .fields = (VMStateField[]) {
1075         VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
1076         VMSTATE_MSIX(parent_obj, IVShmemState),
1077         VMSTATE_UINT32(intrstatus, IVShmemState),
1078         VMSTATE_UINT32(intrmask, IVShmemState),
1079         VMSTATE_END_OF_LIST()
1080     },
1081 };
1082 
1083 static Property ivshmem_doorbell_properties[] = {
1084     DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
1085     DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
1086     DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD,
1087                     true),
1088     DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
1089     DEFINE_PROP_END_OF_LIST(),
1090 };
1091 
1092 static void ivshmem_doorbell_init(Object *obj)
1093 {
1094     IVShmemState *s = IVSHMEM_DOORBELL(obj);
1095 
1096     s->features |= (1 << IVSHMEM_MSI);
1097 }
1098 
1099 static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp)
1100 {
1101     IVShmemState *s = IVSHMEM_COMMON(dev);
1102 
1103     if (!qemu_chr_fe_backend_connected(&s->server_chr)) {
1104         error_setg(errp, "You must specify a 'chardev'");
1105         return;
1106     }
1107 
1108     ivshmem_common_realize(dev, errp);
1109 }
1110 
1111 static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data)
1112 {
1113     DeviceClass *dc = DEVICE_CLASS(klass);
1114     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1115 
1116     k->realize = ivshmem_doorbell_realize;
1117     device_class_set_props(dc, ivshmem_doorbell_properties);
1118     dc->vmsd = &ivshmem_doorbell_vmsd;
1119 }
1120 
1121 static const TypeInfo ivshmem_doorbell_info = {
1122     .name          = TYPE_IVSHMEM_DOORBELL,
1123     .parent        = TYPE_IVSHMEM_COMMON,
1124     .instance_size = sizeof(IVShmemState),
1125     .instance_init = ivshmem_doorbell_init,
1126     .class_init    = ivshmem_doorbell_class_init,
1127 };
1128 
1129 static void ivshmem_register_types(void)
1130 {
1131     type_register_static(&ivshmem_common_info);
1132     type_register_static(&ivshmem_plain_info);
1133     type_register_static(&ivshmem_doorbell_info);
1134 }
1135 
1136 type_init(ivshmem_register_types)
1137