xref: /openbmc/qemu/hw/i386/kvm/xen_evtchn.c (revision 0b29090a)
1 /*
2  * QEMU Xen emulation: Event channel support
3  *
4  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5  *
6  * Authors: David Woodhouse <dwmw2@infradead.org>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/host-utils.h"
14 #include "qemu/module.h"
15 #include "qemu/lockable.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/log.h"
18 #include "monitor/monitor.h"
19 #include "monitor/hmp.h"
20 #include "qapi/error.h"
21 #include "qapi/qapi-commands-misc-target.h"
22 #include "qapi/qmp/qdict.h"
23 #include "qom/object.h"
24 #include "exec/target_page.h"
25 #include "exec/address-spaces.h"
26 #include "migration/vmstate.h"
27 #include "trace.h"
28 
29 #include "hw/sysbus.h"
30 #include "hw/xen/xen.h"
31 #include "hw/i386/x86.h"
32 #include "hw/i386/pc.h"
33 #include "hw/pci/pci.h"
34 #include "hw/pci/msi.h"
35 #include "hw/pci/msix.h"
36 #include "hw/irq.h"
37 #include "hw/xen/xen_backend_ops.h"
38 
39 #include "xen_evtchn.h"
40 #include "xen_overlay.h"
41 #include "xen_xenstore.h"
42 
43 #include "sysemu/kvm.h"
44 #include "sysemu/kvm_xen.h"
45 #include <linux/kvm.h>
46 #include <sys/eventfd.h>
47 
48 #include "hw/xen/interface/memory.h"
49 #include "hw/xen/interface/hvm/params.h"
50 
51 /* XX: For kvm_update_msi_routes_all() */
52 #include "target/i386/kvm/kvm_i386.h"
53 
54 #define TYPE_XEN_EVTCHN "xen-evtchn"
55 OBJECT_DECLARE_SIMPLE_TYPE(XenEvtchnState, XEN_EVTCHN)
56 
57 typedef struct XenEvtchnPort {
58     uint32_t vcpu;      /* Xen/ACPI vcpu_id */
59     uint16_t type;      /* EVTCHNSTAT_xxxx */
60     uint16_t type_val;  /* pirq# / virq# / remote port according to type */
61 } XenEvtchnPort;
62 
63 /* 32-bit compatibility definitions, also used natively in 32-bit build */
64 struct compat_arch_vcpu_info {
65     unsigned int cr2;
66     unsigned int pad[5];
67 };
68 
69 struct compat_vcpu_info {
70     uint8_t evtchn_upcall_pending;
71     uint8_t evtchn_upcall_mask;
72     uint16_t pad;
73     uint32_t evtchn_pending_sel;
74     struct compat_arch_vcpu_info arch;
75     struct vcpu_time_info time;
76 }; /* 64 bytes (x86) */
77 
78 struct compat_arch_shared_info {
79     unsigned int max_pfn;
80     unsigned int pfn_to_mfn_frame_list_list;
81     unsigned int nmi_reason;
82     unsigned int p2m_cr3;
83     unsigned int p2m_vaddr;
84     unsigned int p2m_generation;
85     uint32_t wc_sec_hi;
86 };
87 
88 struct compat_shared_info {
89     struct compat_vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS];
90     uint32_t evtchn_pending[32];
91     uint32_t evtchn_mask[32];
92     uint32_t wc_version;      /* Version counter: see vcpu_time_info_t. */
93     uint32_t wc_sec;
94     uint32_t wc_nsec;
95     struct compat_arch_shared_info arch;
96 };
97 
98 #define COMPAT_EVTCHN_2L_NR_CHANNELS            1024
99 
100 /* Local private implementation of struct xenevtchn_handle */
101 struct xenevtchn_handle {
102     evtchn_port_t be_port;
103     evtchn_port_t guest_port; /* Or zero for unbound */
104     int fd;
105 };
106 
107 /*
108  * For unbound/interdomain ports there are only two possible remote
109  * domains; self and QEMU. Use a single high bit in type_val for that,
110  * and the low bits for the remote port number (or 0 for unbound).
111  */
112 #define PORT_INFO_TYPEVAL_REMOTE_QEMU           0x8000
113 #define PORT_INFO_TYPEVAL_REMOTE_PORT_MASK      0x7FFF
114 
115 /*
116  * These 'emuirq' values are used by Xen in the LM stream... and yes, I am
117  * insane enough to think about guest-transparent live migration from actual
118  * Xen to QEMU, and ensuring that we can convert/consume the stream.
119  */
120 #define IRQ_UNBOUND -1
121 #define IRQ_PT -2
122 #define IRQ_MSI_EMU -3
123 
124 
125 struct pirq_info {
126     int gsi;
127     uint16_t port;
128     PCIDevice *dev;
129     int vector;
130     bool is_msix;
131     bool is_masked;
132     bool is_translated;
133 };
134 
135 struct XenEvtchnState {
136     /*< private >*/
137     SysBusDevice busdev;
138     /*< public >*/
139 
140     uint64_t callback_param;
141     bool evtchn_in_kernel;
142     uint32_t callback_gsi;
143 
144     QEMUBH *gsi_bh;
145 
146     QemuMutex port_lock;
147     uint32_t nr_ports;
148     XenEvtchnPort port_table[EVTCHN_2L_NR_CHANNELS];
149     qemu_irq gsis[IOAPIC_NUM_PINS];
150 
151     struct xenevtchn_handle *be_handles[EVTCHN_2L_NR_CHANNELS];
152 
153     uint32_t nr_pirqs;
154 
155     /* Bitmap of allocated PIRQs (serialized) */
156     uint16_t nr_pirq_inuse_words;
157     uint64_t *pirq_inuse_bitmap;
158 
159     /* GSI → PIRQ mapping (serialized) */
160     uint16_t gsi_pirq[IOAPIC_NUM_PINS];
161 
162     /* Per-GSI assertion state (serialized) */
163     uint32_t pirq_gsi_set;
164 
165     /* Per-PIRQ information (rebuilt on migration, protected by BQL) */
166     struct pirq_info *pirq;
167 };
168 
169 #define pirq_inuse_word(s, pirq) (s->pirq_inuse_bitmap[((pirq) / 64)])
170 #define pirq_inuse_bit(pirq) (1ULL << ((pirq) & 63))
171 
172 #define pirq_inuse(s, pirq) (pirq_inuse_word(s, pirq) & pirq_inuse_bit(pirq))
173 
174 struct XenEvtchnState *xen_evtchn_singleton;
175 
176 /* Top bits of callback_param are the type (HVM_PARAM_CALLBACK_TYPE_xxx) */
177 #define CALLBACK_VIA_TYPE_SHIFT 56
178 
179 static void unbind_backend_ports(XenEvtchnState *s);
180 
181 static int xen_evtchn_pre_load(void *opaque)
182 {
183     XenEvtchnState *s = opaque;
184 
185     /* Unbind all the backend-side ports; they need to rebind */
186     unbind_backend_ports(s);
187 
188     /* It'll be leaked otherwise. */
189     g_free(s->pirq_inuse_bitmap);
190     s->pirq_inuse_bitmap = NULL;
191 
192     return 0;
193 }
194 
195 static int xen_evtchn_post_load(void *opaque, int version_id)
196 {
197     XenEvtchnState *s = opaque;
198     uint32_t i;
199 
200     if (s->callback_param) {
201         xen_evtchn_set_callback_param(s->callback_param);
202     }
203 
204     /* Rebuild s->pirq[].port mapping */
205     for (i = 0; i < s->nr_ports; i++) {
206         XenEvtchnPort *p = &s->port_table[i];
207 
208         if (p->type == EVTCHNSTAT_pirq) {
209             assert(p->type_val);
210             assert(p->type_val < s->nr_pirqs);
211 
212             /*
213              * Set the gsi to IRQ_UNBOUND; it may be changed to an actual
214              * GSI# below, or to IRQ_MSI_EMU when the MSI table snooping
215              * catches up with it.
216              */
217             s->pirq[p->type_val].gsi = IRQ_UNBOUND;
218             s->pirq[p->type_val].port = i;
219         }
220     }
221     /* Rebuild s->pirq[].gsi mapping */
222     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
223         if (s->gsi_pirq[i]) {
224             s->pirq[s->gsi_pirq[i]].gsi = i;
225         }
226     }
227     return 0;
228 }
229 
230 static bool xen_evtchn_is_needed(void *opaque)
231 {
232     return xen_mode == XEN_EMULATE;
233 }
234 
235 static const VMStateDescription xen_evtchn_port_vmstate = {
236     .name = "xen_evtchn_port",
237     .version_id = 1,
238     .minimum_version_id = 1,
239     .fields = (VMStateField[]) {
240         VMSTATE_UINT32(vcpu, XenEvtchnPort),
241         VMSTATE_UINT16(type, XenEvtchnPort),
242         VMSTATE_UINT16(type_val, XenEvtchnPort),
243         VMSTATE_END_OF_LIST()
244     }
245 };
246 
247 static const VMStateDescription xen_evtchn_vmstate = {
248     .name = "xen_evtchn",
249     .version_id = 1,
250     .minimum_version_id = 1,
251     .needed = xen_evtchn_is_needed,
252     .pre_load = xen_evtchn_pre_load,
253     .post_load = xen_evtchn_post_load,
254     .fields = (VMStateField[]) {
255         VMSTATE_UINT64(callback_param, XenEvtchnState),
256         VMSTATE_UINT32(nr_ports, XenEvtchnState),
257         VMSTATE_STRUCT_VARRAY_UINT32(port_table, XenEvtchnState, nr_ports, 1,
258                                      xen_evtchn_port_vmstate, XenEvtchnPort),
259         VMSTATE_UINT16_ARRAY(gsi_pirq, XenEvtchnState, IOAPIC_NUM_PINS),
260         VMSTATE_VARRAY_UINT16_ALLOC(pirq_inuse_bitmap, XenEvtchnState,
261                                     nr_pirq_inuse_words, 0,
262                                     vmstate_info_uint64, uint64_t),
263         VMSTATE_UINT32(pirq_gsi_set, XenEvtchnState),
264         VMSTATE_END_OF_LIST()
265     }
266 };
267 
268 static void xen_evtchn_class_init(ObjectClass *klass, void *data)
269 {
270     DeviceClass *dc = DEVICE_CLASS(klass);
271 
272     dc->vmsd = &xen_evtchn_vmstate;
273 }
274 
275 static const TypeInfo xen_evtchn_info = {
276     .name          = TYPE_XEN_EVTCHN,
277     .parent        = TYPE_SYS_BUS_DEVICE,
278     .instance_size = sizeof(XenEvtchnState),
279     .class_init    = xen_evtchn_class_init,
280 };
281 
282 static struct evtchn_backend_ops emu_evtchn_backend_ops = {
283     .open = xen_be_evtchn_open,
284     .bind_interdomain = xen_be_evtchn_bind_interdomain,
285     .unbind = xen_be_evtchn_unbind,
286     .close = xen_be_evtchn_close,
287     .get_fd = xen_be_evtchn_fd,
288     .notify = xen_be_evtchn_notify,
289     .unmask = xen_be_evtchn_unmask,
290     .pending = xen_be_evtchn_pending,
291 };
292 
293 static void gsi_assert_bh(void *opaque)
294 {
295     struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
296     if (vi) {
297         xen_evtchn_set_callback_level(!!vi->evtchn_upcall_pending);
298     }
299 }
300 
301 void xen_evtchn_create(void)
302 {
303     XenEvtchnState *s = XEN_EVTCHN(sysbus_create_simple(TYPE_XEN_EVTCHN,
304                                                         -1, NULL));
305     int i;
306 
307     xen_evtchn_singleton = s;
308 
309     qemu_mutex_init(&s->port_lock);
310     s->gsi_bh = aio_bh_new(qemu_get_aio_context(), gsi_assert_bh, s);
311 
312     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
313         sysbus_init_irq(SYS_BUS_DEVICE(s), &s->gsis[i]);
314     }
315 
316     /*
317      * The Xen scheme for encoding PIRQ# into an MSI message is not
318      * compatible with 32-bit MSI, as it puts the high bits of the
319      * PIRQ# into the high bits of the MSI message address, instead of
320      * using the Extended Destination ID in address bits 4-11 which
321      * perhaps would have been a better choice.
322      *
323      * To keep life simple, kvm_accel_instance_init() initialises the
324      * default to 256. which conveniently doesn't need to set anything
325      * outside the low 32 bits of the address. It can be increased by
326      * setting the xen-evtchn-max-pirq property.
327      */
328     s->nr_pirqs = kvm_xen_get_evtchn_max_pirq();
329 
330     s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64);
331     s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words);
332     s->pirq = g_new0(struct pirq_info, s->nr_pirqs);
333 
334     /* Set event channel functions for backend drivers to use */
335     xen_evtchn_ops = &emu_evtchn_backend_ops;
336 }
337 
338 void xen_evtchn_connect_gsis(qemu_irq *system_gsis)
339 {
340     XenEvtchnState *s = xen_evtchn_singleton;
341     int i;
342 
343     if (!s) {
344         return;
345     }
346 
347     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
348         sysbus_connect_irq(SYS_BUS_DEVICE(s), i, system_gsis[i]);
349     }
350 }
351 
352 static void xen_evtchn_register_types(void)
353 {
354     type_register_static(&xen_evtchn_info);
355 }
356 
357 type_init(xen_evtchn_register_types)
358 
359 static int set_callback_pci_intx(XenEvtchnState *s, uint64_t param)
360 {
361     PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
362     uint8_t pin = param & 3;
363     uint8_t devfn = (param >> 8) & 0xff;
364     uint16_t bus = (param >> 16) & 0xffff;
365     uint16_t domain = (param >> 32) & 0xffff;
366     PCIDevice *pdev;
367     PCIINTxRoute r;
368 
369     if (domain || !pcms) {
370         return 0;
371     }
372 
373     pdev = pci_find_device(pcms->bus, bus, devfn);
374     if (!pdev) {
375         return 0;
376     }
377 
378     r = pci_device_route_intx_to_irq(pdev, pin);
379     if (r.mode != PCI_INTX_ENABLED) {
380         return 0;
381     }
382 
383     /*
384      * Hm, can we be notified of INTX routing changes? Not without
385      * *owning* the device and being allowed to overwrite its own
386      * ->intx_routing_notifier, AFAICT. So let's not.
387      */
388     return r.irq;
389 }
390 
391 void xen_evtchn_set_callback_level(int level)
392 {
393     XenEvtchnState *s = xen_evtchn_singleton;
394     if (!s) {
395         return;
396     }
397 
398     /*
399      * We get to this function in a number of ways:
400      *
401      *  • From I/O context, via PV backend drivers sending a notification to
402      *    the guest.
403      *
404      *  • From guest vCPU context, via loopback interdomain event channels
405      *    (or theoretically even IPIs but guests don't use those with GSI
406      *    delivery because that's pointless. We don't want a malicious guest
407      *    to be able to trigger a deadlock though, so we can't rule it out.)
408      *
409      *  • From guest vCPU context when the HVM_PARAM_CALLBACK_IRQ is being
410      *    configured.
411      *
412      *  • From guest vCPU context in the KVM exit handler, if the upcall
413      *    pending flag has been cleared and the GSI needs to be deasserted.
414      *
415      *  • Maybe in future, in an interrupt ack/eoi notifier when the GSI has
416      *    been acked in the irqchip.
417      *
418      * Whichever context we come from if we aren't already holding the BQL
419      * then e can't take it now, as we may already hold s->port_lock. So
420      * trigger the BH to set the IRQ for us instead of doing it immediately.
421      *
422      * In the HVM_PARAM_CALLBACK_IRQ and KVM exit handler cases, the caller
423      * will deliberately take the BQL because they want the change to take
424      * effect immediately. That just leaves interdomain loopback as the case
425      * which uses the BH.
426      */
427     if (!qemu_mutex_iothread_locked()) {
428         qemu_bh_schedule(s->gsi_bh);
429         return;
430     }
431 
432     if (s->callback_gsi && s->callback_gsi < IOAPIC_NUM_PINS) {
433         qemu_set_irq(s->gsis[s->callback_gsi], level);
434         if (level) {
435             /* Ensure the vCPU polls for deassertion */
436             kvm_xen_set_callback_asserted();
437         }
438     }
439 }
440 
441 int xen_evtchn_set_callback_param(uint64_t param)
442 {
443     XenEvtchnState *s = xen_evtchn_singleton;
444     struct kvm_xen_hvm_attr xa = {
445         .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
446         .u.vector = 0,
447     };
448     bool in_kernel = false;
449     uint32_t gsi = 0;
450     int type = param >> CALLBACK_VIA_TYPE_SHIFT;
451     int ret;
452 
453     if (!s) {
454         return -ENOTSUP;
455     }
456 
457     /*
458      * We need the BQL because set_callback_pci_intx() may call into PCI code,
459      * and because we may need to manipulate the old and new GSI levels.
460      */
461     assert(qemu_mutex_iothread_locked());
462     qemu_mutex_lock(&s->port_lock);
463 
464     switch (type) {
465     case HVM_PARAM_CALLBACK_TYPE_VECTOR: {
466         xa.u.vector = (uint8_t)param,
467 
468         ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
469         if (!ret && kvm_xen_has_cap(EVTCHN_SEND)) {
470             in_kernel = true;
471         }
472         gsi = 0;
473         break;
474     }
475 
476     case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
477         gsi = set_callback_pci_intx(s, param);
478         ret = gsi ? 0 : -EINVAL;
479         break;
480 
481     case HVM_PARAM_CALLBACK_TYPE_GSI:
482         gsi = (uint32_t)param;
483         ret = 0;
484         break;
485 
486     default:
487         /* Xen doesn't return error even if you set something bogus */
488         ret = 0;
489         break;
490     }
491 
492     if (!ret) {
493         /* If vector delivery was turned *off* then tell the kernel */
494         if ((s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) ==
495             HVM_PARAM_CALLBACK_TYPE_VECTOR && !xa.u.vector) {
496             kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
497         }
498         s->callback_param = param;
499         s->evtchn_in_kernel = in_kernel;
500 
501         if (gsi != s->callback_gsi) {
502             struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
503 
504             xen_evtchn_set_callback_level(0);
505             s->callback_gsi = gsi;
506 
507             if (gsi && vi && vi->evtchn_upcall_pending) {
508                 kvm_xen_inject_vcpu_callback_vector(0, type);
509             }
510         }
511     }
512 
513     qemu_mutex_unlock(&s->port_lock);
514 
515     return ret;
516 }
517 
518 static void inject_callback(XenEvtchnState *s, uint32_t vcpu)
519 {
520     int type = s->callback_param >> CALLBACK_VIA_TYPE_SHIFT;
521 
522     kvm_xen_inject_vcpu_callback_vector(vcpu, type);
523 }
524 
525 static void deassign_kernel_port(evtchn_port_t port)
526 {
527     struct kvm_xen_hvm_attr ha;
528     int ret;
529 
530     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
531     ha.u.evtchn.send_port = port;
532     ha.u.evtchn.flags = KVM_XEN_EVTCHN_DEASSIGN;
533 
534     ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
535     if (ret) {
536         qemu_log_mask(LOG_GUEST_ERROR, "Failed to unbind kernel port %d: %s\n",
537                       port, strerror(ret));
538     }
539 }
540 
541 static int assign_kernel_port(uint16_t type, evtchn_port_t port,
542                               uint32_t vcpu_id)
543 {
544     CPUState *cpu = qemu_get_cpu(vcpu_id);
545     struct kvm_xen_hvm_attr ha;
546 
547     if (!cpu) {
548         return -ENOENT;
549     }
550 
551     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
552     ha.u.evtchn.send_port = port;
553     ha.u.evtchn.type = type;
554     ha.u.evtchn.flags = 0;
555     ha.u.evtchn.deliver.port.port = port;
556     ha.u.evtchn.deliver.port.vcpu = kvm_arch_vcpu_id(cpu);
557     ha.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
558 
559     return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
560 }
561 
562 static int assign_kernel_eventfd(uint16_t type, evtchn_port_t port, int fd)
563 {
564     struct kvm_xen_hvm_attr ha;
565 
566     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
567     ha.u.evtchn.send_port = port;
568     ha.u.evtchn.type = type;
569     ha.u.evtchn.flags = 0;
570     ha.u.evtchn.deliver.eventfd.port = 0;
571     ha.u.evtchn.deliver.eventfd.fd = fd;
572 
573     return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
574 }
575 
576 static bool valid_port(evtchn_port_t port)
577 {
578     if (!port) {
579         return false;
580     }
581 
582     if (xen_is_long_mode()) {
583         return port < EVTCHN_2L_NR_CHANNELS;
584     } else {
585         return port < COMPAT_EVTCHN_2L_NR_CHANNELS;
586     }
587 }
588 
589 static bool valid_vcpu(uint32_t vcpu)
590 {
591     return !!qemu_get_cpu(vcpu);
592 }
593 
594 static void unbind_backend_ports(XenEvtchnState *s)
595 {
596     XenEvtchnPort *p;
597     int i;
598 
599     for (i = 1; i < s->nr_ports; i++) {
600         p = &s->port_table[i];
601         if (p->type == EVTCHNSTAT_interdomain &&
602             (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU)) {
603             evtchn_port_t be_port = p->type_val & PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
604 
605             if (s->be_handles[be_port]) {
606                 /* This part will be overwritten on the load anyway. */
607                 p->type = EVTCHNSTAT_unbound;
608                 p->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
609 
610                 /* Leave the backend port open and unbound too. */
611                 if (kvm_xen_has_cap(EVTCHN_SEND)) {
612                     deassign_kernel_port(i);
613                 }
614                 s->be_handles[be_port]->guest_port = 0;
615             }
616         }
617     }
618 }
619 
620 int xen_evtchn_status_op(struct evtchn_status *status)
621 {
622     XenEvtchnState *s = xen_evtchn_singleton;
623     XenEvtchnPort *p;
624 
625     if (!s) {
626         return -ENOTSUP;
627     }
628 
629     if (status->dom != DOMID_SELF && status->dom != xen_domid) {
630         return -ESRCH;
631     }
632 
633     if (!valid_port(status->port)) {
634         return -EINVAL;
635     }
636 
637     qemu_mutex_lock(&s->port_lock);
638 
639     p = &s->port_table[status->port];
640 
641     status->status = p->type;
642     status->vcpu = p->vcpu;
643 
644     switch (p->type) {
645     case EVTCHNSTAT_unbound:
646         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
647             status->u.unbound.dom = DOMID_QEMU;
648         } else {
649             status->u.unbound.dom = xen_domid;
650         }
651         break;
652 
653     case EVTCHNSTAT_interdomain:
654         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
655             status->u.interdomain.dom = DOMID_QEMU;
656         } else {
657             status->u.interdomain.dom = xen_domid;
658         }
659 
660         status->u.interdomain.port = p->type_val &
661             PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
662         break;
663 
664     case EVTCHNSTAT_pirq:
665         status->u.pirq = p->type_val;
666         break;
667 
668     case EVTCHNSTAT_virq:
669         status->u.virq = p->type_val;
670         break;
671     }
672 
673     qemu_mutex_unlock(&s->port_lock);
674     return 0;
675 }
676 
677 /*
678  * Never thought I'd hear myself say this, but C++ templates would be
679  * kind of nice here.
680  *
681  * template<class T> static int do_unmask_port(T *shinfo, ...);
682  */
683 static int do_unmask_port_lm(XenEvtchnState *s, evtchn_port_t port,
684                              bool do_unmask, struct shared_info *shinfo,
685                              struct vcpu_info *vcpu_info)
686 {
687     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
688     typeof(shinfo->evtchn_pending[0]) mask;
689     int idx = port / bits_per_word;
690     int offset = port % bits_per_word;
691 
692     mask = 1UL << offset;
693 
694     if (idx >= bits_per_word) {
695         return -EINVAL;
696     }
697 
698     if (do_unmask) {
699         /*
700          * If this is a true unmask operation, clear the mask bit. If
701          * it was already unmasked, we have nothing further to do.
702          */
703         if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
704             return 0;
705         }
706     } else {
707         /*
708          * This is a pseudo-unmask for affinity changes. We don't
709          * change the mask bit, and if it's *masked* we have nothing
710          * else to do.
711          */
712         if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
713             return 0;
714         }
715     }
716 
717     /* If the event was not pending, we're done. */
718     if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
719         return 0;
720     }
721 
722     /* Now on to the vcpu_info evtchn_pending_sel index... */
723     mask = 1UL << idx;
724 
725     /* If a port in this word was already pending for this vCPU, all done. */
726     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
727         return 0;
728     }
729 
730     /* Set evtchn_upcall_pending for this vCPU */
731     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
732         return 0;
733     }
734 
735     inject_callback(s, s->port_table[port].vcpu);
736 
737     return 0;
738 }
739 
740 static int do_unmask_port_compat(XenEvtchnState *s, evtchn_port_t port,
741                                  bool do_unmask,
742                                  struct compat_shared_info *shinfo,
743                                  struct compat_vcpu_info *vcpu_info)
744 {
745     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
746     typeof(shinfo->evtchn_pending[0]) mask;
747     int idx = port / bits_per_word;
748     int offset = port % bits_per_word;
749 
750     mask = 1UL << offset;
751 
752     if (idx >= bits_per_word) {
753         return -EINVAL;
754     }
755 
756     if (do_unmask) {
757         /*
758          * If this is a true unmask operation, clear the mask bit. If
759          * it was already unmasked, we have nothing further to do.
760          */
761         if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
762             return 0;
763         }
764     } else {
765         /*
766          * This is a pseudo-unmask for affinity changes. We don't
767          * change the mask bit, and if it's *masked* we have nothing
768          * else to do.
769          */
770         if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
771             return 0;
772         }
773     }
774 
775     /* If the event was not pending, we're done. */
776     if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
777         return 0;
778     }
779 
780     /* Now on to the vcpu_info evtchn_pending_sel index... */
781     mask = 1UL << idx;
782 
783     /* If a port in this word was already pending for this vCPU, all done. */
784     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
785         return 0;
786     }
787 
788     /* Set evtchn_upcall_pending for this vCPU */
789     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
790         return 0;
791     }
792 
793     inject_callback(s, s->port_table[port].vcpu);
794 
795     return 0;
796 }
797 
798 static int unmask_port(XenEvtchnState *s, evtchn_port_t port, bool do_unmask)
799 {
800     void *vcpu_info, *shinfo;
801 
802     if (s->port_table[port].type == EVTCHNSTAT_closed) {
803         return -EINVAL;
804     }
805 
806     shinfo = xen_overlay_get_shinfo_ptr();
807     if (!shinfo) {
808         return -ENOTSUP;
809     }
810 
811     vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
812     if (!vcpu_info) {
813         return -EINVAL;
814     }
815 
816     if (xen_is_long_mode()) {
817         return do_unmask_port_lm(s, port, do_unmask, shinfo, vcpu_info);
818     } else {
819         return do_unmask_port_compat(s, port, do_unmask, shinfo, vcpu_info);
820     }
821 }
822 
823 static int do_set_port_lm(XenEvtchnState *s, evtchn_port_t port,
824                           struct shared_info *shinfo,
825                           struct vcpu_info *vcpu_info)
826 {
827     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
828     typeof(shinfo->evtchn_pending[0]) mask;
829     int idx = port / bits_per_word;
830     int offset = port % bits_per_word;
831 
832     mask = 1UL << offset;
833 
834     if (idx >= bits_per_word) {
835         return -EINVAL;
836     }
837 
838     /* Update the pending bit itself. If it was already set, we're done. */
839     if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
840         return 0;
841     }
842 
843     /* Check if it's masked. */
844     if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
845         return 0;
846     }
847 
848     /* Now on to the vcpu_info evtchn_pending_sel index... */
849     mask = 1UL << idx;
850 
851     /* If a port in this word was already pending for this vCPU, all done. */
852     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
853         return 0;
854     }
855 
856     /* Set evtchn_upcall_pending for this vCPU */
857     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
858         return 0;
859     }
860 
861     inject_callback(s, s->port_table[port].vcpu);
862 
863     return 0;
864 }
865 
866 static int do_set_port_compat(XenEvtchnState *s, evtchn_port_t port,
867                               struct compat_shared_info *shinfo,
868                               struct compat_vcpu_info *vcpu_info)
869 {
870     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
871     typeof(shinfo->evtchn_pending[0]) mask;
872     int idx = port / bits_per_word;
873     int offset = port % bits_per_word;
874 
875     mask = 1UL << offset;
876 
877     if (idx >= bits_per_word) {
878         return -EINVAL;
879     }
880 
881     /* Update the pending bit itself. If it was already set, we're done. */
882     if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
883         return 0;
884     }
885 
886     /* Check if it's masked. */
887     if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
888         return 0;
889     }
890 
891     /* Now on to the vcpu_info evtchn_pending_sel index... */
892     mask = 1UL << idx;
893 
894     /* If a port in this word was already pending for this vCPU, all done. */
895     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
896         return 0;
897     }
898 
899     /* Set evtchn_upcall_pending for this vCPU */
900     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
901         return 0;
902     }
903 
904     inject_callback(s, s->port_table[port].vcpu);
905 
906     return 0;
907 }
908 
909 static int set_port_pending(XenEvtchnState *s, evtchn_port_t port)
910 {
911     void *vcpu_info, *shinfo;
912 
913     if (s->port_table[port].type == EVTCHNSTAT_closed) {
914         return -EINVAL;
915     }
916 
917     if (s->evtchn_in_kernel) {
918         XenEvtchnPort *p = &s->port_table[port];
919         CPUState *cpu = qemu_get_cpu(p->vcpu);
920         struct kvm_irq_routing_xen_evtchn evt;
921 
922         if (!cpu) {
923             return 0;
924         }
925 
926         evt.port = port;
927         evt.vcpu = kvm_arch_vcpu_id(cpu);
928         evt.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
929 
930         return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_EVTCHN_SEND, &evt);
931     }
932 
933     shinfo = xen_overlay_get_shinfo_ptr();
934     if (!shinfo) {
935         return -ENOTSUP;
936     }
937 
938     vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
939     if (!vcpu_info) {
940         return -EINVAL;
941     }
942 
943     if (xen_is_long_mode()) {
944         return do_set_port_lm(s, port, shinfo, vcpu_info);
945     } else {
946         return do_set_port_compat(s, port, shinfo, vcpu_info);
947     }
948 }
949 
950 static int clear_port_pending(XenEvtchnState *s, evtchn_port_t port)
951 {
952     void *p = xen_overlay_get_shinfo_ptr();
953 
954     if (!p) {
955         return -ENOTSUP;
956     }
957 
958     if (xen_is_long_mode()) {
959         struct shared_info *shinfo = p;
960         const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
961         typeof(shinfo->evtchn_pending[0]) mask;
962         int idx = port / bits_per_word;
963         int offset = port % bits_per_word;
964 
965         mask = 1UL << offset;
966 
967         qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
968     } else {
969         struct compat_shared_info *shinfo = p;
970         const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
971         typeof(shinfo->evtchn_pending[0]) mask;
972         int idx = port / bits_per_word;
973         int offset = port % bits_per_word;
974 
975         mask = 1UL << offset;
976 
977         qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
978     }
979     return 0;
980 }
981 
982 static void free_port(XenEvtchnState *s, evtchn_port_t port)
983 {
984     s->port_table[port].type = EVTCHNSTAT_closed;
985     s->port_table[port].type_val = 0;
986     s->port_table[port].vcpu = 0;
987 
988     if (s->nr_ports == port + 1) {
989         do {
990             s->nr_ports--;
991         } while (s->nr_ports &&
992                  s->port_table[s->nr_ports - 1].type == EVTCHNSTAT_closed);
993     }
994 
995     /* Clear pending event to avoid unexpected behavior on re-bind. */
996     clear_port_pending(s, port);
997 }
998 
999 static int allocate_port(XenEvtchnState *s, uint32_t vcpu, uint16_t type,
1000                          uint16_t val, evtchn_port_t *port)
1001 {
1002     evtchn_port_t p = 1;
1003 
1004     for (p = 1; valid_port(p); p++) {
1005         if (s->port_table[p].type == EVTCHNSTAT_closed) {
1006             s->port_table[p].vcpu = vcpu;
1007             s->port_table[p].type = type;
1008             s->port_table[p].type_val = val;
1009 
1010             *port = p;
1011 
1012             if (s->nr_ports < p + 1) {
1013                 s->nr_ports = p + 1;
1014             }
1015 
1016             return 0;
1017         }
1018     }
1019     return -ENOSPC;
1020 }
1021 
1022 static bool virq_is_global(uint32_t virq)
1023 {
1024     switch (virq) {
1025     case VIRQ_TIMER:
1026     case VIRQ_DEBUG:
1027     case VIRQ_XENOPROF:
1028     case VIRQ_XENPMU:
1029         return false;
1030 
1031     default:
1032         return true;
1033     }
1034 }
1035 
1036 static int close_port(XenEvtchnState *s, evtchn_port_t port,
1037                       bool *flush_kvm_routes)
1038 {
1039     XenEvtchnPort *p = &s->port_table[port];
1040 
1041     /* Because it *might* be a PIRQ port */
1042     assert(qemu_mutex_iothread_locked());
1043 
1044     switch (p->type) {
1045     case EVTCHNSTAT_closed:
1046         return -ENOENT;
1047 
1048     case EVTCHNSTAT_pirq:
1049         s->pirq[p->type_val].port = 0;
1050         if (s->pirq[p->type_val].is_translated) {
1051             *flush_kvm_routes = true;
1052         }
1053         break;
1054 
1055     case EVTCHNSTAT_virq:
1056         kvm_xen_set_vcpu_virq(virq_is_global(p->type_val) ? 0 : p->vcpu,
1057                               p->type_val, 0);
1058         break;
1059 
1060     case EVTCHNSTAT_ipi:
1061         if (s->evtchn_in_kernel) {
1062             deassign_kernel_port(port);
1063         }
1064         break;
1065 
1066     case EVTCHNSTAT_interdomain:
1067         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
1068             uint16_t be_port = p->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
1069             struct xenevtchn_handle *xc = s->be_handles[be_port];
1070             if (xc) {
1071                 if (kvm_xen_has_cap(EVTCHN_SEND)) {
1072                     deassign_kernel_port(port);
1073                 }
1074                 xc->guest_port = 0;
1075             }
1076         } else {
1077             /* Loopback interdomain */
1078             XenEvtchnPort *rp = &s->port_table[p->type_val];
1079             if (!valid_port(p->type_val) || rp->type_val != port ||
1080                 rp->type != EVTCHNSTAT_interdomain) {
1081                 error_report("Inconsistent state for interdomain unbind");
1082             } else {
1083                 /* Set the other end back to unbound */
1084                 rp->type = EVTCHNSTAT_unbound;
1085                 rp->type_val = 0;
1086             }
1087         }
1088         break;
1089 
1090     default:
1091         break;
1092     }
1093 
1094     free_port(s, port);
1095     return 0;
1096 }
1097 
1098 int xen_evtchn_soft_reset(void)
1099 {
1100     XenEvtchnState *s = xen_evtchn_singleton;
1101     bool flush_kvm_routes;
1102     int i;
1103 
1104     if (!s) {
1105         return -ENOTSUP;
1106     }
1107 
1108     assert(qemu_mutex_iothread_locked());
1109 
1110     qemu_mutex_lock(&s->port_lock);
1111 
1112     for (i = 0; i < s->nr_ports; i++) {
1113         close_port(s, i, &flush_kvm_routes);
1114     }
1115 
1116     qemu_mutex_unlock(&s->port_lock);
1117 
1118     if (flush_kvm_routes) {
1119         kvm_update_msi_routes_all(NULL, true, 0, 0);
1120     }
1121 
1122     return 0;
1123 }
1124 
1125 int xen_evtchn_reset_op(struct evtchn_reset *reset)
1126 {
1127     if (reset->dom != DOMID_SELF && reset->dom != xen_domid) {
1128         return -ESRCH;
1129     }
1130 
1131     return xen_evtchn_soft_reset();
1132 }
1133 
1134 int xen_evtchn_close_op(struct evtchn_close *close)
1135 {
1136     XenEvtchnState *s = xen_evtchn_singleton;
1137     bool flush_kvm_routes = false;
1138     int ret;
1139 
1140     if (!s) {
1141         return -ENOTSUP;
1142     }
1143 
1144     if (!valid_port(close->port)) {
1145         return -EINVAL;
1146     }
1147 
1148     QEMU_IOTHREAD_LOCK_GUARD();
1149     qemu_mutex_lock(&s->port_lock);
1150 
1151     ret = close_port(s, close->port, &flush_kvm_routes);
1152 
1153     qemu_mutex_unlock(&s->port_lock);
1154 
1155     if (flush_kvm_routes) {
1156         kvm_update_msi_routes_all(NULL, true, 0, 0);
1157     }
1158 
1159     return ret;
1160 }
1161 
1162 int xen_evtchn_unmask_op(struct evtchn_unmask *unmask)
1163 {
1164     XenEvtchnState *s = xen_evtchn_singleton;
1165     int ret;
1166 
1167     if (!s) {
1168         return -ENOTSUP;
1169     }
1170 
1171     if (!valid_port(unmask->port)) {
1172         return -EINVAL;
1173     }
1174 
1175     qemu_mutex_lock(&s->port_lock);
1176 
1177     ret = unmask_port(s, unmask->port, true);
1178 
1179     qemu_mutex_unlock(&s->port_lock);
1180 
1181     return ret;
1182 }
1183 
1184 int xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu *vcpu)
1185 {
1186     XenEvtchnState *s = xen_evtchn_singleton;
1187     XenEvtchnPort *p;
1188     int ret = -EINVAL;
1189 
1190     if (!s) {
1191         return -ENOTSUP;
1192     }
1193 
1194     if (!valid_port(vcpu->port)) {
1195         return -EINVAL;
1196     }
1197 
1198     if (!valid_vcpu(vcpu->vcpu)) {
1199         return -ENOENT;
1200     }
1201 
1202     qemu_mutex_lock(&s->port_lock);
1203 
1204     p = &s->port_table[vcpu->port];
1205 
1206     if (p->type == EVTCHNSTAT_interdomain ||
1207         p->type == EVTCHNSTAT_unbound ||
1208         p->type == EVTCHNSTAT_pirq ||
1209         (p->type == EVTCHNSTAT_virq && virq_is_global(p->type_val))) {
1210         /*
1211          * unmask_port() with do_unmask==false will just raise the event
1212          * on the new vCPU if the port was already pending.
1213          */
1214         p->vcpu = vcpu->vcpu;
1215         unmask_port(s, vcpu->port, false);
1216         ret = 0;
1217     }
1218 
1219     qemu_mutex_unlock(&s->port_lock);
1220 
1221     return ret;
1222 }
1223 
1224 int xen_evtchn_bind_virq_op(struct evtchn_bind_virq *virq)
1225 {
1226     XenEvtchnState *s = xen_evtchn_singleton;
1227     int ret;
1228 
1229     if (!s) {
1230         return -ENOTSUP;
1231     }
1232 
1233     if (virq->virq >= NR_VIRQS) {
1234         return -EINVAL;
1235     }
1236 
1237     /* Global VIRQ must be allocated on vCPU0 first */
1238     if (virq_is_global(virq->virq) && virq->vcpu != 0) {
1239         return -EINVAL;
1240     }
1241 
1242     if (!valid_vcpu(virq->vcpu)) {
1243         return -ENOENT;
1244     }
1245 
1246     qemu_mutex_lock(&s->port_lock);
1247 
1248     ret = allocate_port(s, virq->vcpu, EVTCHNSTAT_virq, virq->virq,
1249                         &virq->port);
1250     if (!ret) {
1251         ret = kvm_xen_set_vcpu_virq(virq->vcpu, virq->virq, virq->port);
1252         if (ret) {
1253             free_port(s, virq->port);
1254         }
1255     }
1256 
1257     qemu_mutex_unlock(&s->port_lock);
1258 
1259     return ret;
1260 }
1261 
1262 int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq *pirq)
1263 {
1264     XenEvtchnState *s = xen_evtchn_singleton;
1265     int ret;
1266 
1267     if (!s) {
1268         return -ENOTSUP;
1269     }
1270 
1271     if (pirq->pirq >= s->nr_pirqs) {
1272         return -EINVAL;
1273     }
1274 
1275     QEMU_IOTHREAD_LOCK_GUARD();
1276 
1277     if (s->pirq[pirq->pirq].port) {
1278         return -EBUSY;
1279     }
1280 
1281     qemu_mutex_lock(&s->port_lock);
1282 
1283     ret = allocate_port(s, 0, EVTCHNSTAT_pirq, pirq->pirq,
1284                         &pirq->port);
1285     if (ret) {
1286         qemu_mutex_unlock(&s->port_lock);
1287         return ret;
1288     }
1289 
1290     s->pirq[pirq->pirq].port = pirq->port;
1291     trace_kvm_xen_bind_pirq(pirq->pirq, pirq->port);
1292 
1293     qemu_mutex_unlock(&s->port_lock);
1294 
1295     /*
1296      * Need to do the unmask outside port_lock because it may call
1297      * back into the MSI translate function.
1298      */
1299     if (s->pirq[pirq->pirq].gsi == IRQ_MSI_EMU) {
1300         if (s->pirq[pirq->pirq].is_masked) {
1301             PCIDevice *dev = s->pirq[pirq->pirq].dev;
1302             int vector = s->pirq[pirq->pirq].vector;
1303             char *dev_path = qdev_get_dev_path(DEVICE(dev));
1304 
1305             trace_kvm_xen_unmask_pirq(pirq->pirq, dev_path, vector);
1306             g_free(dev_path);
1307 
1308             if (s->pirq[pirq->pirq].is_msix) {
1309                 msix_set_mask(dev, vector, false);
1310             } else {
1311                 msi_set_mask(dev, vector, false, NULL);
1312             }
1313         } else if (s->pirq[pirq->pirq].is_translated) {
1314             /*
1315              * If KVM had attempted to translate this one before, make it try
1316              * again. If we unmasked, then the notifier on the MSI(-X) vector
1317              * will already have had the same effect.
1318              */
1319             kvm_update_msi_routes_all(NULL, true, 0, 0);
1320         }
1321     }
1322 
1323     return ret;
1324 }
1325 
1326 int xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi *ipi)
1327 {
1328     XenEvtchnState *s = xen_evtchn_singleton;
1329     int ret;
1330 
1331     if (!s) {
1332         return -ENOTSUP;
1333     }
1334 
1335     if (!valid_vcpu(ipi->vcpu)) {
1336         return -ENOENT;
1337     }
1338 
1339     qemu_mutex_lock(&s->port_lock);
1340 
1341     ret = allocate_port(s, ipi->vcpu, EVTCHNSTAT_ipi, 0, &ipi->port);
1342     if (!ret && s->evtchn_in_kernel) {
1343         assign_kernel_port(EVTCHNSTAT_ipi, ipi->port, ipi->vcpu);
1344     }
1345 
1346     qemu_mutex_unlock(&s->port_lock);
1347 
1348     return ret;
1349 }
1350 
1351 int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain)
1352 {
1353     XenEvtchnState *s = xen_evtchn_singleton;
1354     uint16_t type_val;
1355     int ret;
1356 
1357     if (!s) {
1358         return -ENOTSUP;
1359     }
1360 
1361     if (interdomain->remote_dom == DOMID_QEMU) {
1362         type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
1363     } else if (interdomain->remote_dom == DOMID_SELF ||
1364                interdomain->remote_dom == xen_domid) {
1365         type_val = 0;
1366     } else {
1367         return -ESRCH;
1368     }
1369 
1370     if (!valid_port(interdomain->remote_port)) {
1371         return -EINVAL;
1372     }
1373 
1374     qemu_mutex_lock(&s->port_lock);
1375 
1376     /* The newly allocated port starts out as unbound */
1377     ret = allocate_port(s, 0, EVTCHNSTAT_unbound, type_val,
1378                         &interdomain->local_port);
1379     if (ret) {
1380         goto out;
1381     }
1382 
1383     if (interdomain->remote_dom == DOMID_QEMU) {
1384         struct xenevtchn_handle *xc = s->be_handles[interdomain->remote_port];
1385         XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1386 
1387         if (!xc) {
1388             ret = -ENOENT;
1389             goto out_free_port;
1390         }
1391 
1392         if (xc->guest_port) {
1393             ret = -EBUSY;
1394             goto out_free_port;
1395         }
1396 
1397         assert(xc->be_port == interdomain->remote_port);
1398         xc->guest_port = interdomain->local_port;
1399         if (kvm_xen_has_cap(EVTCHN_SEND)) {
1400             assign_kernel_eventfd(lp->type, xc->guest_port, xc->fd);
1401         }
1402         lp->type = EVTCHNSTAT_interdomain;
1403         lp->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU | interdomain->remote_port;
1404         ret = 0;
1405     } else {
1406         /* Loopback */
1407         XenEvtchnPort *rp = &s->port_table[interdomain->remote_port];
1408         XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1409 
1410         if (rp->type == EVTCHNSTAT_unbound && rp->type_val == 0) {
1411             /* It's a match! */
1412             rp->type = EVTCHNSTAT_interdomain;
1413             rp->type_val = interdomain->local_port;
1414 
1415             lp->type = EVTCHNSTAT_interdomain;
1416             lp->type_val = interdomain->remote_port;
1417         } else {
1418             ret = -EINVAL;
1419         }
1420     }
1421 
1422  out_free_port:
1423     if (ret) {
1424         free_port(s, interdomain->local_port);
1425     }
1426  out:
1427     qemu_mutex_unlock(&s->port_lock);
1428 
1429     return ret;
1430 
1431 }
1432 int xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound *alloc)
1433 {
1434     XenEvtchnState *s = xen_evtchn_singleton;
1435     uint16_t type_val;
1436     int ret;
1437 
1438     if (!s) {
1439         return -ENOTSUP;
1440     }
1441 
1442     if (alloc->dom != DOMID_SELF && alloc->dom != xen_domid) {
1443         return -ESRCH;
1444     }
1445 
1446     if (alloc->remote_dom == DOMID_QEMU) {
1447         type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
1448     } else if (alloc->remote_dom == DOMID_SELF ||
1449                alloc->remote_dom == xen_domid) {
1450         type_val = 0;
1451     } else {
1452         return -EPERM;
1453     }
1454 
1455     qemu_mutex_lock(&s->port_lock);
1456 
1457     ret = allocate_port(s, 0, EVTCHNSTAT_unbound, type_val, &alloc->port);
1458 
1459     qemu_mutex_unlock(&s->port_lock);
1460 
1461     return ret;
1462 }
1463 
1464 int xen_evtchn_send_op(struct evtchn_send *send)
1465 {
1466     XenEvtchnState *s = xen_evtchn_singleton;
1467     XenEvtchnPort *p;
1468     int ret = 0;
1469 
1470     if (!s) {
1471         return -ENOTSUP;
1472     }
1473 
1474     if (!valid_port(send->port)) {
1475         return -EINVAL;
1476     }
1477 
1478     qemu_mutex_lock(&s->port_lock);
1479 
1480     p = &s->port_table[send->port];
1481 
1482     switch (p->type) {
1483     case EVTCHNSTAT_interdomain:
1484         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
1485             /*
1486              * This is an event from the guest to qemu itself, which is
1487              * serving as the driver domain.
1488              */
1489             uint16_t be_port = p->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
1490             struct xenevtchn_handle *xc = s->be_handles[be_port];
1491             if (xc) {
1492                 eventfd_write(xc->fd, 1);
1493                 ret = 0;
1494             } else {
1495                 ret = -ENOENT;
1496             }
1497         } else {
1498             /* Loopback interdomain ports; just a complex IPI */
1499             set_port_pending(s, p->type_val);
1500         }
1501         break;
1502 
1503     case EVTCHNSTAT_ipi:
1504         set_port_pending(s, send->port);
1505         break;
1506 
1507     case EVTCHNSTAT_unbound:
1508         /* Xen will silently drop these */
1509         break;
1510 
1511     default:
1512         ret = -EINVAL;
1513         break;
1514     }
1515 
1516     qemu_mutex_unlock(&s->port_lock);
1517 
1518     return ret;
1519 }
1520 
1521 int xen_evtchn_set_port(uint16_t port)
1522 {
1523     XenEvtchnState *s = xen_evtchn_singleton;
1524     XenEvtchnPort *p;
1525     int ret = -EINVAL;
1526 
1527     if (!s) {
1528         return -ENOTSUP;
1529     }
1530 
1531     if (!valid_port(port)) {
1532         return -EINVAL;
1533     }
1534 
1535     qemu_mutex_lock(&s->port_lock);
1536 
1537     p = &s->port_table[port];
1538 
1539     /* QEMU has no business sending to anything but these */
1540     if (p->type == EVTCHNSTAT_virq ||
1541         (p->type == EVTCHNSTAT_interdomain &&
1542          (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU))) {
1543         set_port_pending(s, port);
1544         ret = 0;
1545     }
1546 
1547     qemu_mutex_unlock(&s->port_lock);
1548 
1549     return ret;
1550 }
1551 
1552 static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
1553 {
1554     uint16_t pirq;
1555 
1556     /*
1557      * Preserve the allocation strategy that Xen has. It looks like
1558      * we *never* give out PIRQ 0-15, we give out 16-nr_irqs_gsi only
1559      * to GSIs (counting up from 16), and then we count backwards from
1560      * the top for MSIs or when the GSI space is exhausted.
1561      */
1562     if (type == MAP_PIRQ_TYPE_GSI) {
1563         for (pirq = 16 ; pirq < IOAPIC_NUM_PINS; pirq++) {
1564             if (pirq_inuse(s, pirq)) {
1565                 continue;
1566             }
1567 
1568             /* Found it */
1569             goto found;
1570         }
1571     }
1572     for (pirq = s->nr_pirqs - 1; pirq >= IOAPIC_NUM_PINS; pirq--) {
1573         /* Skip whole words at a time when they're full */
1574         if (pirq_inuse_word(s, pirq) == UINT64_MAX) {
1575             pirq &= ~63ULL;
1576             continue;
1577         }
1578         if (pirq_inuse(s, pirq)) {
1579             continue;
1580         }
1581 
1582         goto found;
1583     }
1584     return -ENOSPC;
1585 
1586  found:
1587     pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1588     if (gsi >= 0) {
1589         assert(gsi <= IOAPIC_NUM_PINS);
1590         s->gsi_pirq[gsi] = pirq;
1591     }
1592     s->pirq[pirq].gsi = gsi;
1593     return pirq;
1594 }
1595 
1596 bool xen_evtchn_set_gsi(int gsi, int level)
1597 {
1598     XenEvtchnState *s = xen_evtchn_singleton;
1599     int pirq;
1600 
1601     assert(qemu_mutex_iothread_locked());
1602 
1603     if (!s || gsi < 0 || gsi > IOAPIC_NUM_PINS) {
1604         return false;
1605     }
1606 
1607     /*
1608      * Check that that it *isn't* the event channel GSI, and thus
1609      * that we are not recursing and it's safe to take s->port_lock.
1610      *
1611      * Locking aside, it's perfectly sane to bail out early for that
1612      * special case, as it would make no sense for the event channel
1613      * GSI to be routed back to event channels, when the delivery
1614      * method is to raise the GSI... that recursion wouldn't *just*
1615      * be a locking issue.
1616      */
1617     if (gsi && gsi == s->callback_gsi) {
1618         return false;
1619     }
1620 
1621     QEMU_LOCK_GUARD(&s->port_lock);
1622 
1623     pirq = s->gsi_pirq[gsi];
1624     if (!pirq) {
1625         return false;
1626     }
1627 
1628     if (level) {
1629         int port = s->pirq[pirq].port;
1630 
1631         s->pirq_gsi_set |= (1U << gsi);
1632         if (port) {
1633             set_port_pending(s, port);
1634         }
1635     } else {
1636         s->pirq_gsi_set &= ~(1U << gsi);
1637     }
1638     return true;
1639 }
1640 
1641 static uint32_t msi_pirq_target(uint64_t addr, uint32_t data)
1642 {
1643     /* The vector (in low 8 bits of data) must be zero */
1644     if (data & 0xff) {
1645         return 0;
1646     }
1647 
1648     uint32_t pirq = (addr & 0xff000) >> 12;
1649     pirq |= (addr >> 32) & 0xffffff00;
1650 
1651     return pirq;
1652 }
1653 
1654 static void do_remove_pci_vector(XenEvtchnState *s, PCIDevice *dev, int vector,
1655                                  int except_pirq)
1656 {
1657     uint32_t pirq;
1658 
1659     for (pirq = 0; pirq < s->nr_pirqs; pirq++) {
1660         /*
1661          * We could be cleverer here, but it isn't really a fast path, and
1662          * this trivial optimisation is enough to let us skip the big gap
1663          * in the middle a bit quicker (in terms of both loop iterations,
1664          * and cache lines).
1665          */
1666         if (!(pirq & 63) && !(pirq_inuse_word(s, pirq))) {
1667             pirq += 64;
1668             continue;
1669         }
1670         if (except_pirq && pirq == except_pirq) {
1671             continue;
1672         }
1673         if (s->pirq[pirq].dev != dev) {
1674             continue;
1675         }
1676         if (vector != -1 && s->pirq[pirq].vector != vector) {
1677             continue;
1678         }
1679 
1680         /* It could theoretically be bound to a port already, but that is OK. */
1681         s->pirq[pirq].dev = dev;
1682         s->pirq[pirq].gsi = IRQ_UNBOUND;
1683         s->pirq[pirq].is_msix = false;
1684         s->pirq[pirq].vector = 0;
1685         s->pirq[pirq].is_masked = false;
1686         s->pirq[pirq].is_translated = false;
1687     }
1688 }
1689 
1690 void xen_evtchn_remove_pci_device(PCIDevice *dev)
1691 {
1692     XenEvtchnState *s = xen_evtchn_singleton;
1693 
1694     if (!s) {
1695         return;
1696     }
1697 
1698     QEMU_LOCK_GUARD(&s->port_lock);
1699     do_remove_pci_vector(s, dev, -1, 0);
1700 }
1701 
1702 void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
1703                           uint64_t addr, uint32_t data, bool is_masked)
1704 {
1705     XenEvtchnState *s = xen_evtchn_singleton;
1706     uint32_t pirq;
1707 
1708     if (!s) {
1709         return;
1710     }
1711 
1712     assert(qemu_mutex_iothread_locked());
1713 
1714     pirq = msi_pirq_target(addr, data);
1715 
1716     /*
1717      * The PIRQ# must be sane, and there must be an allocated PIRQ in
1718      * IRQ_UNBOUND or IRQ_MSI_EMU state to match it.
1719      */
1720     if (!pirq || pirq >= s->nr_pirqs || !pirq_inuse(s, pirq) ||
1721         (s->pirq[pirq].gsi != IRQ_UNBOUND &&
1722          s->pirq[pirq].gsi != IRQ_MSI_EMU)) {
1723         pirq = 0;
1724     }
1725 
1726     if (pirq) {
1727         s->pirq[pirq].dev = dev;
1728         s->pirq[pirq].gsi = IRQ_MSI_EMU;
1729         s->pirq[pirq].is_msix = is_msix;
1730         s->pirq[pirq].vector = vector;
1731         s->pirq[pirq].is_masked = is_masked;
1732     }
1733 
1734     /* Remove any (other) entries for this {device, vector} */
1735     do_remove_pci_vector(s, dev, vector, pirq);
1736 }
1737 
1738 int xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route,
1739                                   uint64_t address, uint32_t data)
1740 {
1741     XenEvtchnState *s = xen_evtchn_singleton;
1742     uint32_t pirq, port;
1743     CPUState *cpu;
1744 
1745     if (!s) {
1746         return 1; /* Not a PIRQ */
1747     }
1748 
1749     assert(qemu_mutex_iothread_locked());
1750 
1751     pirq = msi_pirq_target(address, data);
1752     if (!pirq || pirq >= s->nr_pirqs) {
1753         return 1; /* Not a PIRQ */
1754     }
1755 
1756     if (!kvm_xen_has_cap(EVTCHN_2LEVEL)) {
1757         return -ENOTSUP;
1758     }
1759 
1760     if (s->pirq[pirq].gsi != IRQ_MSI_EMU) {
1761         return -EINVAL;
1762     }
1763 
1764     /* Remember that KVM tried to translate this. It might need to try again. */
1765     s->pirq[pirq].is_translated = true;
1766 
1767     QEMU_LOCK_GUARD(&s->port_lock);
1768 
1769     port = s->pirq[pirq].port;
1770     if (!valid_port(port)) {
1771         return -EINVAL;
1772     }
1773 
1774     cpu = qemu_get_cpu(s->port_table[port].vcpu);
1775     if (!cpu) {
1776         return -EINVAL;
1777     }
1778 
1779     route->type = KVM_IRQ_ROUTING_XEN_EVTCHN;
1780     route->u.xen_evtchn.port = port;
1781     route->u.xen_evtchn.vcpu = kvm_arch_vcpu_id(cpu);
1782     route->u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
1783 
1784     return 0; /* Handled */
1785 }
1786 
1787 bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data)
1788 {
1789     XenEvtchnState *s = xen_evtchn_singleton;
1790     uint32_t pirq, port;
1791 
1792     if (!s) {
1793         return false;
1794     }
1795 
1796     assert(qemu_mutex_iothread_locked());
1797 
1798     pirq = msi_pirq_target(address, data);
1799     if (!pirq || pirq >= s->nr_pirqs) {
1800         return false;
1801     }
1802 
1803     QEMU_LOCK_GUARD(&s->port_lock);
1804 
1805     port = s->pirq[pirq].port;
1806     if (!valid_port(port)) {
1807         return false;
1808     }
1809 
1810     set_port_pending(s, port);
1811     return true;
1812 }
1813 
1814 int xen_physdev_map_pirq(struct physdev_map_pirq *map)
1815 {
1816     XenEvtchnState *s = xen_evtchn_singleton;
1817     int pirq = map->pirq;
1818     int gsi = map->index;
1819 
1820     if (!s) {
1821         return -ENOTSUP;
1822     }
1823 
1824     QEMU_IOTHREAD_LOCK_GUARD();
1825     QEMU_LOCK_GUARD(&s->port_lock);
1826 
1827     if (map->domid != DOMID_SELF && map->domid != xen_domid) {
1828         return -EPERM;
1829     }
1830     if (map->type != MAP_PIRQ_TYPE_GSI) {
1831         return -EINVAL;
1832     }
1833     if (gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
1834         return -EINVAL;
1835     }
1836 
1837     if (pirq < 0) {
1838         pirq = allocate_pirq(s, map->type, gsi);
1839         if (pirq < 0) {
1840             return pirq;
1841         }
1842         map->pirq = pirq;
1843     } else if (pirq > s->nr_pirqs) {
1844         return -EINVAL;
1845     } else {
1846         /*
1847          * User specified a valid-looking PIRQ#. Allow it if it is
1848          * allocated and not yet bound, or if it is unallocated
1849          */
1850         if (pirq_inuse(s, pirq)) {
1851             if (s->pirq[pirq].gsi != IRQ_UNBOUND) {
1852                 return -EBUSY;
1853             }
1854         } else {
1855             /* If it was unused, mark it used now. */
1856             pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1857         }
1858         /* Set the mapping in both directions. */
1859         s->pirq[pirq].gsi = gsi;
1860         s->gsi_pirq[gsi] = pirq;
1861     }
1862 
1863     trace_kvm_xen_map_pirq(pirq, gsi);
1864     return 0;
1865 }
1866 
1867 int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
1868 {
1869     XenEvtchnState *s = xen_evtchn_singleton;
1870     int pirq = unmap->pirq;
1871     int gsi;
1872 
1873     if (!s) {
1874         return -ENOTSUP;
1875     }
1876 
1877     if (unmap->domid != DOMID_SELF && unmap->domid != xen_domid) {
1878         return -EPERM;
1879     }
1880     if (pirq < 0 || pirq >= s->nr_pirqs) {
1881         return -EINVAL;
1882     }
1883 
1884     QEMU_IOTHREAD_LOCK_GUARD();
1885     qemu_mutex_lock(&s->port_lock);
1886 
1887     if (!pirq_inuse(s, pirq)) {
1888         qemu_mutex_unlock(&s->port_lock);
1889         return -ENOENT;
1890     }
1891 
1892     gsi = s->pirq[pirq].gsi;
1893 
1894     /* We can only unmap GSI PIRQs */
1895     if (gsi < 0) {
1896         qemu_mutex_unlock(&s->port_lock);
1897         return -EINVAL;
1898     }
1899 
1900     s->gsi_pirq[gsi] = 0;
1901     s->pirq[pirq].gsi = IRQ_UNBOUND; /* Doesn't actually matter because: */
1902     pirq_inuse_word(s, pirq) &= ~pirq_inuse_bit(pirq);
1903 
1904     trace_kvm_xen_unmap_pirq(pirq, gsi);
1905     qemu_mutex_unlock(&s->port_lock);
1906 
1907     if (gsi == IRQ_MSI_EMU) {
1908         kvm_update_msi_routes_all(NULL, true, 0, 0);
1909     }
1910 
1911     return 0;
1912 }
1913 
1914 int xen_physdev_eoi_pirq(struct physdev_eoi *eoi)
1915 {
1916     XenEvtchnState *s = xen_evtchn_singleton;
1917     int pirq = eoi->irq;
1918     int gsi;
1919 
1920     if (!s) {
1921         return -ENOTSUP;
1922     }
1923 
1924     QEMU_IOTHREAD_LOCK_GUARD();
1925     QEMU_LOCK_GUARD(&s->port_lock);
1926 
1927     if (!pirq_inuse(s, pirq)) {
1928         return -ENOENT;
1929     }
1930 
1931     gsi = s->pirq[pirq].gsi;
1932     if (gsi < 0) {
1933         return -EINVAL;
1934     }
1935 
1936     /* Reassert a level IRQ if needed */
1937     if (s->pirq_gsi_set & (1U << gsi)) {
1938         int port = s->pirq[pirq].port;
1939         if (port) {
1940             set_port_pending(s, port);
1941         }
1942     }
1943 
1944     return 0;
1945 }
1946 
1947 int xen_physdev_query_pirq(struct physdev_irq_status_query *query)
1948 {
1949     XenEvtchnState *s = xen_evtchn_singleton;
1950     int pirq = query->irq;
1951 
1952     if (!s) {
1953         return -ENOTSUP;
1954     }
1955 
1956     QEMU_IOTHREAD_LOCK_GUARD();
1957     QEMU_LOCK_GUARD(&s->port_lock);
1958 
1959     if (!pirq_inuse(s, pirq)) {
1960         return -ENOENT;
1961     }
1962 
1963     if (s->pirq[pirq].gsi >= 0) {
1964         query->flags = XENIRQSTAT_needs_eoi;
1965     } else {
1966         query->flags = 0;
1967     }
1968 
1969     return 0;
1970 }
1971 
1972 int xen_physdev_get_free_pirq(struct physdev_get_free_pirq *get)
1973 {
1974     XenEvtchnState *s = xen_evtchn_singleton;
1975     int pirq;
1976 
1977     if (!s) {
1978         return -ENOTSUP;
1979     }
1980 
1981     QEMU_LOCK_GUARD(&s->port_lock);
1982 
1983     pirq = allocate_pirq(s, get->type, IRQ_UNBOUND);
1984     if (pirq < 0) {
1985         return pirq;
1986     }
1987 
1988     get->pirq = pirq;
1989     trace_kvm_xen_get_free_pirq(pirq, get->type);
1990     return 0;
1991 }
1992 
1993 struct xenevtchn_handle *xen_be_evtchn_open(void)
1994 {
1995     struct xenevtchn_handle *xc = g_new0(struct xenevtchn_handle, 1);
1996 
1997     xc->fd = eventfd(0, EFD_CLOEXEC);
1998     if (xc->fd < 0) {
1999         free(xc);
2000         return NULL;
2001     }
2002 
2003     return xc;
2004 }
2005 
2006 static int find_be_port(XenEvtchnState *s, struct xenevtchn_handle *xc)
2007 {
2008     int i;
2009 
2010     for (i = 1; i < EVTCHN_2L_NR_CHANNELS; i++) {
2011         if (!s->be_handles[i]) {
2012             s->be_handles[i] = xc;
2013             xc->be_port = i;
2014             return i;
2015         }
2016     }
2017     return 0;
2018 }
2019 
2020 int xen_be_evtchn_bind_interdomain(struct xenevtchn_handle *xc, uint32_t domid,
2021                                    evtchn_port_t guest_port)
2022 {
2023     XenEvtchnState *s = xen_evtchn_singleton;
2024     XenEvtchnPort *gp;
2025     uint16_t be_port = 0;
2026     int ret;
2027 
2028     if (!s) {
2029         return -ENOTSUP;
2030     }
2031 
2032     if (!xc) {
2033         return -EFAULT;
2034     }
2035 
2036     if (domid != xen_domid) {
2037         return -ESRCH;
2038     }
2039 
2040     if (!valid_port(guest_port)) {
2041         return -EINVAL;
2042     }
2043 
2044     qemu_mutex_lock(&s->port_lock);
2045 
2046     /* The guest has to have an unbound port waiting for us to bind */
2047     gp = &s->port_table[guest_port];
2048 
2049     switch (gp->type) {
2050     case EVTCHNSTAT_interdomain:
2051         /* Allow rebinding after migration, preserve port # if possible */
2052         be_port = gp->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
2053         assert(be_port != 0);
2054         if (!s->be_handles[be_port]) {
2055             s->be_handles[be_port] = xc;
2056             xc->guest_port = guest_port;
2057             ret = xc->be_port = be_port;
2058             if (kvm_xen_has_cap(EVTCHN_SEND)) {
2059                 assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2060             }
2061             break;
2062         }
2063         /* fall through */
2064 
2065     case EVTCHNSTAT_unbound:
2066         be_port = find_be_port(s, xc);
2067         if (!be_port) {
2068             ret = -ENOSPC;
2069             goto out;
2070         }
2071 
2072         gp->type = EVTCHNSTAT_interdomain;
2073         gp->type_val = be_port | PORT_INFO_TYPEVAL_REMOTE_QEMU;
2074         xc->guest_port = guest_port;
2075         if (kvm_xen_has_cap(EVTCHN_SEND)) {
2076             assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2077         }
2078         ret = be_port;
2079         break;
2080 
2081     default:
2082         ret = -EINVAL;
2083         break;
2084     }
2085 
2086  out:
2087     qemu_mutex_unlock(&s->port_lock);
2088 
2089     return ret;
2090 }
2091 
2092 int xen_be_evtchn_unbind(struct xenevtchn_handle *xc, evtchn_port_t port)
2093 {
2094     XenEvtchnState *s = xen_evtchn_singleton;
2095     int ret;
2096 
2097     if (!s) {
2098         return -ENOTSUP;
2099     }
2100 
2101     if (!xc) {
2102         return -EFAULT;
2103     }
2104 
2105     qemu_mutex_lock(&s->port_lock);
2106 
2107     if (port && port != xc->be_port) {
2108         ret = -EINVAL;
2109         goto out;
2110     }
2111 
2112     if (xc->guest_port) {
2113         XenEvtchnPort *gp = &s->port_table[xc->guest_port];
2114 
2115         /* This should never *not* be true */
2116         if (gp->type == EVTCHNSTAT_interdomain) {
2117             gp->type = EVTCHNSTAT_unbound;
2118             gp->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
2119         }
2120 
2121         if (kvm_xen_has_cap(EVTCHN_SEND)) {
2122             deassign_kernel_port(xc->guest_port);
2123         }
2124         xc->guest_port = 0;
2125     }
2126 
2127     s->be_handles[xc->be_port] = NULL;
2128     xc->be_port = 0;
2129     ret = 0;
2130  out:
2131     qemu_mutex_unlock(&s->port_lock);
2132     return ret;
2133 }
2134 
2135 int xen_be_evtchn_close(struct xenevtchn_handle *xc)
2136 {
2137     if (!xc) {
2138         return -EFAULT;
2139     }
2140 
2141     xen_be_evtchn_unbind(xc, 0);
2142 
2143     close(xc->fd);
2144     free(xc);
2145     return 0;
2146 }
2147 
2148 int xen_be_evtchn_fd(struct xenevtchn_handle *xc)
2149 {
2150     if (!xc) {
2151         return -1;
2152     }
2153     return xc->fd;
2154 }
2155 
2156 int xen_be_evtchn_notify(struct xenevtchn_handle *xc, evtchn_port_t port)
2157 {
2158     XenEvtchnState *s = xen_evtchn_singleton;
2159     int ret;
2160 
2161     if (!s) {
2162         return -ENOTSUP;
2163     }
2164 
2165     if (!xc) {
2166         return -EFAULT;
2167     }
2168 
2169     qemu_mutex_lock(&s->port_lock);
2170 
2171     if (xc->guest_port) {
2172         set_port_pending(s, xc->guest_port);
2173         ret = 0;
2174     } else {
2175         ret = -ENOTCONN;
2176     }
2177 
2178     qemu_mutex_unlock(&s->port_lock);
2179 
2180     return ret;
2181 }
2182 
2183 int xen_be_evtchn_pending(struct xenevtchn_handle *xc)
2184 {
2185     uint64_t val;
2186 
2187     if (!xc) {
2188         return -EFAULT;
2189     }
2190 
2191     if (!xc->be_port) {
2192         return 0;
2193     }
2194 
2195     if (eventfd_read(xc->fd, &val)) {
2196         return -errno;
2197     }
2198 
2199     return val ? xc->be_port : 0;
2200 }
2201 
2202 int xen_be_evtchn_unmask(struct xenevtchn_handle *xc, evtchn_port_t port)
2203 {
2204     if (!xc) {
2205         return -EFAULT;
2206     }
2207 
2208     if (xc->be_port != port) {
2209         return -EINVAL;
2210     }
2211 
2212     /*
2213      * We don't actually do anything to unmask it; the event was already
2214      * consumed in xen_be_evtchn_pending().
2215      */
2216     return 0;
2217 }
2218 
2219 int xen_be_evtchn_get_guest_port(struct xenevtchn_handle *xc)
2220 {
2221     return xc->guest_port;
2222 }
2223 
2224 EvtchnInfoList *qmp_xen_event_list(Error **errp)
2225 {
2226     XenEvtchnState *s = xen_evtchn_singleton;
2227     EvtchnInfoList *head = NULL, **tail = &head;
2228     void *shinfo, *pending, *mask;
2229     int i;
2230 
2231     if (!s) {
2232         error_setg(errp, "Xen event channel emulation not enabled");
2233         return NULL;
2234     }
2235 
2236     shinfo = xen_overlay_get_shinfo_ptr();
2237     if (!shinfo) {
2238         error_setg(errp, "Xen shared info page not allocated");
2239         return NULL;
2240     }
2241 
2242     if (xen_is_long_mode()) {
2243         pending = shinfo + offsetof(struct shared_info, evtchn_pending);
2244         mask = shinfo + offsetof(struct shared_info, evtchn_mask);
2245     } else {
2246         pending = shinfo + offsetof(struct compat_shared_info, evtchn_pending);
2247         mask = shinfo + offsetof(struct compat_shared_info, evtchn_mask);
2248     }
2249 
2250     QEMU_LOCK_GUARD(&s->port_lock);
2251 
2252     for (i = 0; i < s->nr_ports; i++) {
2253         XenEvtchnPort *p = &s->port_table[i];
2254         EvtchnInfo *info;
2255 
2256         if (p->type == EVTCHNSTAT_closed) {
2257             continue;
2258         }
2259 
2260         info = g_new0(EvtchnInfo, 1);
2261 
2262         info->port = i;
2263         qemu_build_assert(EVTCHN_PORT_TYPE_CLOSED == EVTCHNSTAT_closed);
2264         qemu_build_assert(EVTCHN_PORT_TYPE_UNBOUND == EVTCHNSTAT_unbound);
2265         qemu_build_assert(EVTCHN_PORT_TYPE_INTERDOMAIN == EVTCHNSTAT_interdomain);
2266         qemu_build_assert(EVTCHN_PORT_TYPE_PIRQ == EVTCHNSTAT_pirq);
2267         qemu_build_assert(EVTCHN_PORT_TYPE_VIRQ == EVTCHNSTAT_virq);
2268         qemu_build_assert(EVTCHN_PORT_TYPE_IPI == EVTCHNSTAT_ipi);
2269 
2270         info->type = p->type;
2271         if (p->type == EVTCHNSTAT_interdomain) {
2272             info->remote_domain = g_strdup((p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) ?
2273                                            "qemu" : "loopback");
2274             info->target = p->type_val & PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
2275         } else {
2276             info->target = p->type_val;
2277         }
2278         info->vcpu = p->vcpu;
2279         info->pending = test_bit(i, pending);
2280         info->masked = test_bit(i, mask);
2281 
2282         QAPI_LIST_APPEND(tail, info);
2283     }
2284 
2285     return head;
2286 }
2287 
2288 void qmp_xen_event_inject(uint32_t port, Error **errp)
2289 {
2290     XenEvtchnState *s = xen_evtchn_singleton;
2291 
2292     if (!s) {
2293         error_setg(errp, "Xen event channel emulation not enabled");
2294         return;
2295     }
2296 
2297     if (!valid_port(port)) {
2298         error_setg(errp, "Invalid port %u", port);
2299     }
2300 
2301     QEMU_LOCK_GUARD(&s->port_lock);
2302 
2303     if (set_port_pending(s, port)) {
2304         error_setg(errp, "Failed to set port %u", port);
2305         return;
2306     }
2307 }
2308 
2309 void hmp_xen_event_list(Monitor *mon, const QDict *qdict)
2310 {
2311     EvtchnInfoList *iter, *info_list;
2312     Error *err = NULL;
2313 
2314     info_list = qmp_xen_event_list(&err);
2315     if (err) {
2316         hmp_handle_error(mon, err);
2317         return;
2318     }
2319 
2320     for (iter = info_list; iter; iter = iter->next) {
2321         EvtchnInfo *info = iter->value;
2322 
2323         monitor_printf(mon, "port %4u: vcpu: %d %s", info->port, info->vcpu,
2324                        EvtchnPortType_str(info->type));
2325         if (info->type != EVTCHN_PORT_TYPE_IPI) {
2326             monitor_printf(mon,  "(");
2327             if (info->remote_domain) {
2328                 monitor_printf(mon, "%s:", info->remote_domain);
2329             }
2330             monitor_printf(mon, "%d)", info->target);
2331         }
2332         if (info->pending) {
2333             monitor_printf(mon, " PENDING");
2334         }
2335         if (info->masked) {
2336             monitor_printf(mon, " MASKED");
2337         }
2338         monitor_printf(mon, "\n");
2339     }
2340 
2341     qapi_free_EvtchnInfoList(info_list);
2342 }
2343 
2344 void hmp_xen_event_inject(Monitor *mon, const QDict *qdict)
2345 {
2346     int port = qdict_get_int(qdict, "port");
2347     Error *err = NULL;
2348 
2349     qmp_xen_event_inject(port, &err);
2350     if (err) {
2351         hmp_handle_error(mon, err);
2352     } else {
2353         monitor_printf(mon, "Delivered port %d\n", port);
2354     }
2355 }
2356 
2357