xref: /openbmc/qemu/hw/i386/kvm/xen_evtchn.c (revision d2dfe0b5)
1 /*
2  * QEMU Xen emulation: Event channel support
3  *
4  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5  *
6  * Authors: David Woodhouse <dwmw2@infradead.org>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/host-utils.h"
14 #include "qemu/module.h"
15 #include "qemu/lockable.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/log.h"
18 #include "qemu/error-report.h"
19 #include "monitor/monitor.h"
20 #include "monitor/hmp.h"
21 #include "qapi/error.h"
22 #include "qapi/qapi-commands-misc-target.h"
23 #include "qapi/qmp/qdict.h"
24 #include "qom/object.h"
25 #include "exec/target_page.h"
26 #include "exec/address-spaces.h"
27 #include "migration/vmstate.h"
28 #include "trace.h"
29 
30 #include "hw/sysbus.h"
31 #include "hw/xen/xen.h"
32 #include "hw/i386/x86.h"
33 #include "hw/i386/pc.h"
34 #include "hw/pci/pci.h"
35 #include "hw/pci/msi.h"
36 #include "hw/pci/msix.h"
37 #include "hw/irq.h"
38 #include "hw/xen/xen_backend_ops.h"
39 
40 #include "xen_evtchn.h"
41 #include "xen_overlay.h"
42 #include "xen_xenstore.h"
43 
44 #include "sysemu/kvm.h"
45 #include "sysemu/kvm_xen.h"
46 #include <linux/kvm.h>
47 #include <sys/eventfd.h>
48 
49 #include "hw/xen/interface/memory.h"
50 #include "hw/xen/interface/hvm/params.h"
51 
52 /* XX: For kvm_update_msi_routes_all() */
53 #include "target/i386/kvm/kvm_i386.h"
54 
55 #define TYPE_XEN_EVTCHN "xen-evtchn"
56 OBJECT_DECLARE_SIMPLE_TYPE(XenEvtchnState, XEN_EVTCHN)
57 
58 typedef struct XenEvtchnPort {
59     uint32_t vcpu;      /* Xen/ACPI vcpu_id */
60     uint16_t type;      /* EVTCHNSTAT_xxxx */
61     uint16_t type_val;  /* pirq# / virq# / remote port according to type */
62 } XenEvtchnPort;
63 
64 /* 32-bit compatibility definitions, also used natively in 32-bit build */
65 struct compat_arch_vcpu_info {
66     unsigned int cr2;
67     unsigned int pad[5];
68 };
69 
70 struct compat_vcpu_info {
71     uint8_t evtchn_upcall_pending;
72     uint8_t evtchn_upcall_mask;
73     uint16_t pad;
74     uint32_t evtchn_pending_sel;
75     struct compat_arch_vcpu_info arch;
76     struct vcpu_time_info time;
77 }; /* 64 bytes (x86) */
78 
79 struct compat_arch_shared_info {
80     unsigned int max_pfn;
81     unsigned int pfn_to_mfn_frame_list_list;
82     unsigned int nmi_reason;
83     unsigned int p2m_cr3;
84     unsigned int p2m_vaddr;
85     unsigned int p2m_generation;
86     uint32_t wc_sec_hi;
87 };
88 
89 struct compat_shared_info {
90     struct compat_vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS];
91     uint32_t evtchn_pending[32];
92     uint32_t evtchn_mask[32];
93     uint32_t wc_version;      /* Version counter: see vcpu_time_info_t. */
94     uint32_t wc_sec;
95     uint32_t wc_nsec;
96     struct compat_arch_shared_info arch;
97 };
98 
99 #define COMPAT_EVTCHN_2L_NR_CHANNELS            1024
100 
101 /* Local private implementation of struct xenevtchn_handle */
102 struct xenevtchn_handle {
103     evtchn_port_t be_port;
104     evtchn_port_t guest_port; /* Or zero for unbound */
105     int fd;
106 };
107 
108 /*
109  * For unbound/interdomain ports there are only two possible remote
110  * domains; self and QEMU. Use a single high bit in type_val for that,
111  * and the low bits for the remote port number (or 0 for unbound).
112  */
113 #define PORT_INFO_TYPEVAL_REMOTE_QEMU           0x8000
114 #define PORT_INFO_TYPEVAL_REMOTE_PORT_MASK      0x7FFF
115 
116 /*
117  * These 'emuirq' values are used by Xen in the LM stream... and yes, I am
118  * insane enough to think about guest-transparent live migration from actual
119  * Xen to QEMU, and ensuring that we can convert/consume the stream.
120  */
121 #define IRQ_UNBOUND -1
122 #define IRQ_PT -2
123 #define IRQ_MSI_EMU -3
124 
125 
126 struct pirq_info {
127     int gsi;
128     uint16_t port;
129     PCIDevice *dev;
130     int vector;
131     bool is_msix;
132     bool is_masked;
133     bool is_translated;
134 };
135 
136 struct XenEvtchnState {
137     /*< private >*/
138     SysBusDevice busdev;
139     /*< public >*/
140 
141     uint64_t callback_param;
142     bool evtchn_in_kernel;
143     uint32_t callback_gsi;
144 
145     QEMUBH *gsi_bh;
146 
147     QemuMutex port_lock;
148     uint32_t nr_ports;
149     XenEvtchnPort port_table[EVTCHN_2L_NR_CHANNELS];
150     qemu_irq gsis[IOAPIC_NUM_PINS];
151 
152     struct xenevtchn_handle *be_handles[EVTCHN_2L_NR_CHANNELS];
153 
154     uint32_t nr_pirqs;
155 
156     /* Bitmap of allocated PIRQs (serialized) */
157     uint16_t nr_pirq_inuse_words;
158     uint64_t *pirq_inuse_bitmap;
159 
160     /* GSI → PIRQ mapping (serialized) */
161     uint16_t gsi_pirq[IOAPIC_NUM_PINS];
162 
163     /* Per-GSI assertion state (serialized) */
164     uint32_t pirq_gsi_set;
165 
166     /* Per-PIRQ information (rebuilt on migration, protected by BQL) */
167     struct pirq_info *pirq;
168 };
169 
170 #define pirq_inuse_word(s, pirq) (s->pirq_inuse_bitmap[((pirq) / 64)])
171 #define pirq_inuse_bit(pirq) (1ULL << ((pirq) & 63))
172 
173 #define pirq_inuse(s, pirq) (pirq_inuse_word(s, pirq) & pirq_inuse_bit(pirq))
174 
175 struct XenEvtchnState *xen_evtchn_singleton;
176 
177 /* Top bits of callback_param are the type (HVM_PARAM_CALLBACK_TYPE_xxx) */
178 #define CALLBACK_VIA_TYPE_SHIFT 56
179 
180 static void unbind_backend_ports(XenEvtchnState *s);
181 
182 static int xen_evtchn_pre_load(void *opaque)
183 {
184     XenEvtchnState *s = opaque;
185 
186     /* Unbind all the backend-side ports; they need to rebind */
187     unbind_backend_ports(s);
188 
189     /* It'll be leaked otherwise. */
190     g_free(s->pirq_inuse_bitmap);
191     s->pirq_inuse_bitmap = NULL;
192 
193     return 0;
194 }
195 
196 static int xen_evtchn_post_load(void *opaque, int version_id)
197 {
198     XenEvtchnState *s = opaque;
199     uint32_t i;
200 
201     if (s->callback_param) {
202         xen_evtchn_set_callback_param(s->callback_param);
203     }
204 
205     /* Rebuild s->pirq[].port mapping */
206     for (i = 0; i < s->nr_ports; i++) {
207         XenEvtchnPort *p = &s->port_table[i];
208 
209         if (p->type == EVTCHNSTAT_pirq) {
210             assert(p->type_val);
211             assert(p->type_val < s->nr_pirqs);
212 
213             /*
214              * Set the gsi to IRQ_UNBOUND; it may be changed to an actual
215              * GSI# below, or to IRQ_MSI_EMU when the MSI table snooping
216              * catches up with it.
217              */
218             s->pirq[p->type_val].gsi = IRQ_UNBOUND;
219             s->pirq[p->type_val].port = i;
220         }
221     }
222     /* Rebuild s->pirq[].gsi mapping */
223     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
224         if (s->gsi_pirq[i]) {
225             s->pirq[s->gsi_pirq[i]].gsi = i;
226         }
227     }
228     return 0;
229 }
230 
231 static bool xen_evtchn_is_needed(void *opaque)
232 {
233     return xen_mode == XEN_EMULATE;
234 }
235 
236 static const VMStateDescription xen_evtchn_port_vmstate = {
237     .name = "xen_evtchn_port",
238     .version_id = 1,
239     .minimum_version_id = 1,
240     .fields = (VMStateField[]) {
241         VMSTATE_UINT32(vcpu, XenEvtchnPort),
242         VMSTATE_UINT16(type, XenEvtchnPort),
243         VMSTATE_UINT16(type_val, XenEvtchnPort),
244         VMSTATE_END_OF_LIST()
245     }
246 };
247 
248 static const VMStateDescription xen_evtchn_vmstate = {
249     .name = "xen_evtchn",
250     .version_id = 1,
251     .minimum_version_id = 1,
252     .needed = xen_evtchn_is_needed,
253     .pre_load = xen_evtchn_pre_load,
254     .post_load = xen_evtchn_post_load,
255     .fields = (VMStateField[]) {
256         VMSTATE_UINT64(callback_param, XenEvtchnState),
257         VMSTATE_UINT32(nr_ports, XenEvtchnState),
258         VMSTATE_STRUCT_VARRAY_UINT32(port_table, XenEvtchnState, nr_ports, 1,
259                                      xen_evtchn_port_vmstate, XenEvtchnPort),
260         VMSTATE_UINT16_ARRAY(gsi_pirq, XenEvtchnState, IOAPIC_NUM_PINS),
261         VMSTATE_VARRAY_UINT16_ALLOC(pirq_inuse_bitmap, XenEvtchnState,
262                                     nr_pirq_inuse_words, 0,
263                                     vmstate_info_uint64, uint64_t),
264         VMSTATE_UINT32(pirq_gsi_set, XenEvtchnState),
265         VMSTATE_END_OF_LIST()
266     }
267 };
268 
269 static void xen_evtchn_class_init(ObjectClass *klass, void *data)
270 {
271     DeviceClass *dc = DEVICE_CLASS(klass);
272 
273     dc->vmsd = &xen_evtchn_vmstate;
274 }
275 
276 static const TypeInfo xen_evtchn_info = {
277     .name          = TYPE_XEN_EVTCHN,
278     .parent        = TYPE_SYS_BUS_DEVICE,
279     .instance_size = sizeof(XenEvtchnState),
280     .class_init    = xen_evtchn_class_init,
281 };
282 
283 static struct evtchn_backend_ops emu_evtchn_backend_ops = {
284     .open = xen_be_evtchn_open,
285     .bind_interdomain = xen_be_evtchn_bind_interdomain,
286     .unbind = xen_be_evtchn_unbind,
287     .close = xen_be_evtchn_close,
288     .get_fd = xen_be_evtchn_fd,
289     .notify = xen_be_evtchn_notify,
290     .unmask = xen_be_evtchn_unmask,
291     .pending = xen_be_evtchn_pending,
292 };
293 
294 static void gsi_assert_bh(void *opaque)
295 {
296     struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
297     if (vi) {
298         xen_evtchn_set_callback_level(!!vi->evtchn_upcall_pending);
299     }
300 }
301 
302 void xen_evtchn_create(void)
303 {
304     XenEvtchnState *s = XEN_EVTCHN(sysbus_create_simple(TYPE_XEN_EVTCHN,
305                                                         -1, NULL));
306     int i;
307 
308     xen_evtchn_singleton = s;
309 
310     qemu_mutex_init(&s->port_lock);
311     s->gsi_bh = aio_bh_new(qemu_get_aio_context(), gsi_assert_bh, s);
312 
313     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
314         sysbus_init_irq(SYS_BUS_DEVICE(s), &s->gsis[i]);
315     }
316 
317     /*
318      * The Xen scheme for encoding PIRQ# into an MSI message is not
319      * compatible with 32-bit MSI, as it puts the high bits of the
320      * PIRQ# into the high bits of the MSI message address, instead of
321      * using the Extended Destination ID in address bits 4-11 which
322      * perhaps would have been a better choice.
323      *
324      * To keep life simple, kvm_accel_instance_init() initialises the
325      * default to 256. which conveniently doesn't need to set anything
326      * outside the low 32 bits of the address. It can be increased by
327      * setting the xen-evtchn-max-pirq property.
328      */
329     s->nr_pirqs = kvm_xen_get_evtchn_max_pirq();
330 
331     s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64);
332     s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words);
333     s->pirq = g_new0(struct pirq_info, s->nr_pirqs);
334 
335     /* Set event channel functions for backend drivers to use */
336     xen_evtchn_ops = &emu_evtchn_backend_ops;
337 }
338 
339 void xen_evtchn_connect_gsis(qemu_irq *system_gsis)
340 {
341     XenEvtchnState *s = xen_evtchn_singleton;
342     int i;
343 
344     if (!s) {
345         return;
346     }
347 
348     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
349         sysbus_connect_irq(SYS_BUS_DEVICE(s), i, system_gsis[i]);
350     }
351 }
352 
353 static void xen_evtchn_register_types(void)
354 {
355     type_register_static(&xen_evtchn_info);
356 }
357 
358 type_init(xen_evtchn_register_types)
359 
360 static int set_callback_pci_intx(XenEvtchnState *s, uint64_t param)
361 {
362     PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
363     uint8_t pin = param & 3;
364     uint8_t devfn = (param >> 8) & 0xff;
365     uint16_t bus = (param >> 16) & 0xffff;
366     uint16_t domain = (param >> 32) & 0xffff;
367     PCIDevice *pdev;
368     PCIINTxRoute r;
369 
370     if (domain || !pcms) {
371         return 0;
372     }
373 
374     pdev = pci_find_device(pcms->bus, bus, devfn);
375     if (!pdev) {
376         return 0;
377     }
378 
379     r = pci_device_route_intx_to_irq(pdev, pin);
380     if (r.mode != PCI_INTX_ENABLED) {
381         return 0;
382     }
383 
384     /*
385      * Hm, can we be notified of INTX routing changes? Not without
386      * *owning* the device and being allowed to overwrite its own
387      * ->intx_routing_notifier, AFAICT. So let's not.
388      */
389     return r.irq;
390 }
391 
392 void xen_evtchn_set_callback_level(int level)
393 {
394     XenEvtchnState *s = xen_evtchn_singleton;
395     if (!s) {
396         return;
397     }
398 
399     /*
400      * We get to this function in a number of ways:
401      *
402      *  • From I/O context, via PV backend drivers sending a notification to
403      *    the guest.
404      *
405      *  • From guest vCPU context, via loopback interdomain event channels
406      *    (or theoretically even IPIs but guests don't use those with GSI
407      *    delivery because that's pointless. We don't want a malicious guest
408      *    to be able to trigger a deadlock though, so we can't rule it out.)
409      *
410      *  • From guest vCPU context when the HVM_PARAM_CALLBACK_IRQ is being
411      *    configured.
412      *
413      *  • From guest vCPU context in the KVM exit handler, if the upcall
414      *    pending flag has been cleared and the GSI needs to be deasserted.
415      *
416      *  • Maybe in future, in an interrupt ack/eoi notifier when the GSI has
417      *    been acked in the irqchip.
418      *
419      * Whichever context we come from if we aren't already holding the BQL
420      * then e can't take it now, as we may already hold s->port_lock. So
421      * trigger the BH to set the IRQ for us instead of doing it immediately.
422      *
423      * In the HVM_PARAM_CALLBACK_IRQ and KVM exit handler cases, the caller
424      * will deliberately take the BQL because they want the change to take
425      * effect immediately. That just leaves interdomain loopback as the case
426      * which uses the BH.
427      */
428     if (!qemu_mutex_iothread_locked()) {
429         qemu_bh_schedule(s->gsi_bh);
430         return;
431     }
432 
433     if (s->callback_gsi && s->callback_gsi < IOAPIC_NUM_PINS) {
434         qemu_set_irq(s->gsis[s->callback_gsi], level);
435         if (level) {
436             /* Ensure the vCPU polls for deassertion */
437             kvm_xen_set_callback_asserted();
438         }
439     }
440 }
441 
442 int xen_evtchn_set_callback_param(uint64_t param)
443 {
444     XenEvtchnState *s = xen_evtchn_singleton;
445     struct kvm_xen_hvm_attr xa = {
446         .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
447         .u.vector = 0,
448     };
449     bool in_kernel = false;
450     uint32_t gsi = 0;
451     int type = param >> CALLBACK_VIA_TYPE_SHIFT;
452     int ret;
453 
454     if (!s) {
455         return -ENOTSUP;
456     }
457 
458     /*
459      * We need the BQL because set_callback_pci_intx() may call into PCI code,
460      * and because we may need to manipulate the old and new GSI levels.
461      */
462     assert(qemu_mutex_iothread_locked());
463     qemu_mutex_lock(&s->port_lock);
464 
465     switch (type) {
466     case HVM_PARAM_CALLBACK_TYPE_VECTOR: {
467         xa.u.vector = (uint8_t)param,
468 
469         ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
470         if (!ret && kvm_xen_has_cap(EVTCHN_SEND)) {
471             in_kernel = true;
472         }
473         gsi = 0;
474         break;
475     }
476 
477     case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
478         gsi = set_callback_pci_intx(s, param);
479         ret = gsi ? 0 : -EINVAL;
480         break;
481 
482     case HVM_PARAM_CALLBACK_TYPE_GSI:
483         gsi = (uint32_t)param;
484         ret = 0;
485         break;
486 
487     default:
488         /* Xen doesn't return error even if you set something bogus */
489         ret = 0;
490         break;
491     }
492 
493     if (!ret) {
494         /* If vector delivery was turned *off* then tell the kernel */
495         if ((s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) ==
496             HVM_PARAM_CALLBACK_TYPE_VECTOR && !xa.u.vector) {
497             kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
498         }
499         s->callback_param = param;
500         s->evtchn_in_kernel = in_kernel;
501 
502         if (gsi != s->callback_gsi) {
503             struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
504 
505             xen_evtchn_set_callback_level(0);
506             s->callback_gsi = gsi;
507 
508             if (gsi && vi && vi->evtchn_upcall_pending) {
509                 kvm_xen_inject_vcpu_callback_vector(0, type);
510             }
511         }
512     }
513 
514     qemu_mutex_unlock(&s->port_lock);
515 
516     return ret;
517 }
518 
519 static void inject_callback(XenEvtchnState *s, uint32_t vcpu)
520 {
521     int type = s->callback_param >> CALLBACK_VIA_TYPE_SHIFT;
522 
523     kvm_xen_inject_vcpu_callback_vector(vcpu, type);
524 }
525 
526 static void deassign_kernel_port(evtchn_port_t port)
527 {
528     struct kvm_xen_hvm_attr ha;
529     int ret;
530 
531     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
532     ha.u.evtchn.send_port = port;
533     ha.u.evtchn.flags = KVM_XEN_EVTCHN_DEASSIGN;
534 
535     ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
536     if (ret) {
537         qemu_log_mask(LOG_GUEST_ERROR, "Failed to unbind kernel port %d: %s\n",
538                       port, strerror(ret));
539     }
540 }
541 
542 static int assign_kernel_port(uint16_t type, evtchn_port_t port,
543                               uint32_t vcpu_id)
544 {
545     CPUState *cpu = qemu_get_cpu(vcpu_id);
546     struct kvm_xen_hvm_attr ha;
547 
548     if (!cpu) {
549         return -ENOENT;
550     }
551 
552     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
553     ha.u.evtchn.send_port = port;
554     ha.u.evtchn.type = type;
555     ha.u.evtchn.flags = 0;
556     ha.u.evtchn.deliver.port.port = port;
557     ha.u.evtchn.deliver.port.vcpu = kvm_arch_vcpu_id(cpu);
558     ha.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
559 
560     return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
561 }
562 
563 static int assign_kernel_eventfd(uint16_t type, evtchn_port_t port, int fd)
564 {
565     struct kvm_xen_hvm_attr ha;
566 
567     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
568     ha.u.evtchn.send_port = port;
569     ha.u.evtchn.type = type;
570     ha.u.evtchn.flags = 0;
571     ha.u.evtchn.deliver.eventfd.port = 0;
572     ha.u.evtchn.deliver.eventfd.fd = fd;
573 
574     return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
575 }
576 
577 static bool valid_port(evtchn_port_t port)
578 {
579     if (!port) {
580         return false;
581     }
582 
583     if (xen_is_long_mode()) {
584         return port < EVTCHN_2L_NR_CHANNELS;
585     } else {
586         return port < COMPAT_EVTCHN_2L_NR_CHANNELS;
587     }
588 }
589 
590 static bool valid_vcpu(uint32_t vcpu)
591 {
592     return !!qemu_get_cpu(vcpu);
593 }
594 
595 static void unbind_backend_ports(XenEvtchnState *s)
596 {
597     XenEvtchnPort *p;
598     int i;
599 
600     for (i = 1; i < s->nr_ports; i++) {
601         p = &s->port_table[i];
602         if (p->type == EVTCHNSTAT_interdomain &&
603             (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU)) {
604             evtchn_port_t be_port = p->type_val & PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
605 
606             if (s->be_handles[be_port]) {
607                 /* This part will be overwritten on the load anyway. */
608                 p->type = EVTCHNSTAT_unbound;
609                 p->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
610 
611                 /* Leave the backend port open and unbound too. */
612                 if (kvm_xen_has_cap(EVTCHN_SEND)) {
613                     deassign_kernel_port(i);
614                 }
615                 s->be_handles[be_port]->guest_port = 0;
616             }
617         }
618     }
619 }
620 
621 int xen_evtchn_status_op(struct evtchn_status *status)
622 {
623     XenEvtchnState *s = xen_evtchn_singleton;
624     XenEvtchnPort *p;
625 
626     if (!s) {
627         return -ENOTSUP;
628     }
629 
630     if (status->dom != DOMID_SELF && status->dom != xen_domid) {
631         return -ESRCH;
632     }
633 
634     if (!valid_port(status->port)) {
635         return -EINVAL;
636     }
637 
638     qemu_mutex_lock(&s->port_lock);
639 
640     p = &s->port_table[status->port];
641 
642     status->status = p->type;
643     status->vcpu = p->vcpu;
644 
645     switch (p->type) {
646     case EVTCHNSTAT_unbound:
647         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
648             status->u.unbound.dom = DOMID_QEMU;
649         } else {
650             status->u.unbound.dom = xen_domid;
651         }
652         break;
653 
654     case EVTCHNSTAT_interdomain:
655         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
656             status->u.interdomain.dom = DOMID_QEMU;
657         } else {
658             status->u.interdomain.dom = xen_domid;
659         }
660 
661         status->u.interdomain.port = p->type_val &
662             PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
663         break;
664 
665     case EVTCHNSTAT_pirq:
666         status->u.pirq = p->type_val;
667         break;
668 
669     case EVTCHNSTAT_virq:
670         status->u.virq = p->type_val;
671         break;
672     }
673 
674     qemu_mutex_unlock(&s->port_lock);
675     return 0;
676 }
677 
678 /*
679  * Never thought I'd hear myself say this, but C++ templates would be
680  * kind of nice here.
681  *
682  * template<class T> static int do_unmask_port(T *shinfo, ...);
683  */
684 static int do_unmask_port_lm(XenEvtchnState *s, evtchn_port_t port,
685                              bool do_unmask, struct shared_info *shinfo,
686                              struct vcpu_info *vcpu_info)
687 {
688     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
689     typeof(shinfo->evtchn_pending[0]) mask;
690     int idx = port / bits_per_word;
691     int offset = port % bits_per_word;
692 
693     mask = 1UL << offset;
694 
695     if (idx >= bits_per_word) {
696         return -EINVAL;
697     }
698 
699     if (do_unmask) {
700         /*
701          * If this is a true unmask operation, clear the mask bit. If
702          * it was already unmasked, we have nothing further to do.
703          */
704         if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
705             return 0;
706         }
707     } else {
708         /*
709          * This is a pseudo-unmask for affinity changes. We don't
710          * change the mask bit, and if it's *masked* we have nothing
711          * else to do.
712          */
713         if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
714             return 0;
715         }
716     }
717 
718     /* If the event was not pending, we're done. */
719     if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
720         return 0;
721     }
722 
723     /* Now on to the vcpu_info evtchn_pending_sel index... */
724     mask = 1UL << idx;
725 
726     /* If a port in this word was already pending for this vCPU, all done. */
727     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
728         return 0;
729     }
730 
731     /* Set evtchn_upcall_pending for this vCPU */
732     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
733         return 0;
734     }
735 
736     inject_callback(s, s->port_table[port].vcpu);
737 
738     return 0;
739 }
740 
741 static int do_unmask_port_compat(XenEvtchnState *s, evtchn_port_t port,
742                                  bool do_unmask,
743                                  struct compat_shared_info *shinfo,
744                                  struct compat_vcpu_info *vcpu_info)
745 {
746     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
747     typeof(shinfo->evtchn_pending[0]) mask;
748     int idx = port / bits_per_word;
749     int offset = port % bits_per_word;
750 
751     mask = 1UL << offset;
752 
753     if (idx >= bits_per_word) {
754         return -EINVAL;
755     }
756 
757     if (do_unmask) {
758         /*
759          * If this is a true unmask operation, clear the mask bit. If
760          * it was already unmasked, we have nothing further to do.
761          */
762         if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
763             return 0;
764         }
765     } else {
766         /*
767          * This is a pseudo-unmask for affinity changes. We don't
768          * change the mask bit, and if it's *masked* we have nothing
769          * else to do.
770          */
771         if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
772             return 0;
773         }
774     }
775 
776     /* If the event was not pending, we're done. */
777     if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
778         return 0;
779     }
780 
781     /* Now on to the vcpu_info evtchn_pending_sel index... */
782     mask = 1UL << idx;
783 
784     /* If a port in this word was already pending for this vCPU, all done. */
785     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
786         return 0;
787     }
788 
789     /* Set evtchn_upcall_pending for this vCPU */
790     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
791         return 0;
792     }
793 
794     inject_callback(s, s->port_table[port].vcpu);
795 
796     return 0;
797 }
798 
799 static int unmask_port(XenEvtchnState *s, evtchn_port_t port, bool do_unmask)
800 {
801     void *vcpu_info, *shinfo;
802 
803     if (s->port_table[port].type == EVTCHNSTAT_closed) {
804         return -EINVAL;
805     }
806 
807     shinfo = xen_overlay_get_shinfo_ptr();
808     if (!shinfo) {
809         return -ENOTSUP;
810     }
811 
812     vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
813     if (!vcpu_info) {
814         return -EINVAL;
815     }
816 
817     if (xen_is_long_mode()) {
818         return do_unmask_port_lm(s, port, do_unmask, shinfo, vcpu_info);
819     } else {
820         return do_unmask_port_compat(s, port, do_unmask, shinfo, vcpu_info);
821     }
822 }
823 
824 static int do_set_port_lm(XenEvtchnState *s, evtchn_port_t port,
825                           struct shared_info *shinfo,
826                           struct vcpu_info *vcpu_info)
827 {
828     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
829     typeof(shinfo->evtchn_pending[0]) mask;
830     int idx = port / bits_per_word;
831     int offset = port % bits_per_word;
832 
833     mask = 1UL << offset;
834 
835     if (idx >= bits_per_word) {
836         return -EINVAL;
837     }
838 
839     /* Update the pending bit itself. If it was already set, we're done. */
840     if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
841         return 0;
842     }
843 
844     /* Check if it's masked. */
845     if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
846         return 0;
847     }
848 
849     /* Now on to the vcpu_info evtchn_pending_sel index... */
850     mask = 1UL << idx;
851 
852     /* If a port in this word was already pending for this vCPU, all done. */
853     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
854         return 0;
855     }
856 
857     /* Set evtchn_upcall_pending for this vCPU */
858     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
859         return 0;
860     }
861 
862     inject_callback(s, s->port_table[port].vcpu);
863 
864     return 0;
865 }
866 
867 static int do_set_port_compat(XenEvtchnState *s, evtchn_port_t port,
868                               struct compat_shared_info *shinfo,
869                               struct compat_vcpu_info *vcpu_info)
870 {
871     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
872     typeof(shinfo->evtchn_pending[0]) mask;
873     int idx = port / bits_per_word;
874     int offset = port % bits_per_word;
875 
876     mask = 1UL << offset;
877 
878     if (idx >= bits_per_word) {
879         return -EINVAL;
880     }
881 
882     /* Update the pending bit itself. If it was already set, we're done. */
883     if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
884         return 0;
885     }
886 
887     /* Check if it's masked. */
888     if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
889         return 0;
890     }
891 
892     /* Now on to the vcpu_info evtchn_pending_sel index... */
893     mask = 1UL << idx;
894 
895     /* If a port in this word was already pending for this vCPU, all done. */
896     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
897         return 0;
898     }
899 
900     /* Set evtchn_upcall_pending for this vCPU */
901     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
902         return 0;
903     }
904 
905     inject_callback(s, s->port_table[port].vcpu);
906 
907     return 0;
908 }
909 
910 static int set_port_pending(XenEvtchnState *s, evtchn_port_t port)
911 {
912     void *vcpu_info, *shinfo;
913 
914     if (s->port_table[port].type == EVTCHNSTAT_closed) {
915         return -EINVAL;
916     }
917 
918     if (s->evtchn_in_kernel) {
919         XenEvtchnPort *p = &s->port_table[port];
920         CPUState *cpu = qemu_get_cpu(p->vcpu);
921         struct kvm_irq_routing_xen_evtchn evt;
922 
923         if (!cpu) {
924             return 0;
925         }
926 
927         evt.port = port;
928         evt.vcpu = kvm_arch_vcpu_id(cpu);
929         evt.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
930 
931         return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_EVTCHN_SEND, &evt);
932     }
933 
934     shinfo = xen_overlay_get_shinfo_ptr();
935     if (!shinfo) {
936         return -ENOTSUP;
937     }
938 
939     vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
940     if (!vcpu_info) {
941         return -EINVAL;
942     }
943 
944     if (xen_is_long_mode()) {
945         return do_set_port_lm(s, port, shinfo, vcpu_info);
946     } else {
947         return do_set_port_compat(s, port, shinfo, vcpu_info);
948     }
949 }
950 
951 static int clear_port_pending(XenEvtchnState *s, evtchn_port_t port)
952 {
953     void *p = xen_overlay_get_shinfo_ptr();
954 
955     if (!p) {
956         return -ENOTSUP;
957     }
958 
959     if (xen_is_long_mode()) {
960         struct shared_info *shinfo = p;
961         const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
962         typeof(shinfo->evtchn_pending[0]) mask;
963         int idx = port / bits_per_word;
964         int offset = port % bits_per_word;
965 
966         mask = 1UL << offset;
967 
968         qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
969     } else {
970         struct compat_shared_info *shinfo = p;
971         const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
972         typeof(shinfo->evtchn_pending[0]) mask;
973         int idx = port / bits_per_word;
974         int offset = port % bits_per_word;
975 
976         mask = 1UL << offset;
977 
978         qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
979     }
980     return 0;
981 }
982 
983 static void free_port(XenEvtchnState *s, evtchn_port_t port)
984 {
985     s->port_table[port].type = EVTCHNSTAT_closed;
986     s->port_table[port].type_val = 0;
987     s->port_table[port].vcpu = 0;
988 
989     if (s->nr_ports == port + 1) {
990         do {
991             s->nr_ports--;
992         } while (s->nr_ports &&
993                  s->port_table[s->nr_ports - 1].type == EVTCHNSTAT_closed);
994     }
995 
996     /* Clear pending event to avoid unexpected behavior on re-bind. */
997     clear_port_pending(s, port);
998 }
999 
1000 static int allocate_port(XenEvtchnState *s, uint32_t vcpu, uint16_t type,
1001                          uint16_t val, evtchn_port_t *port)
1002 {
1003     evtchn_port_t p = 1;
1004 
1005     for (p = 1; valid_port(p); p++) {
1006         if (s->port_table[p].type == EVTCHNSTAT_closed) {
1007             s->port_table[p].vcpu = vcpu;
1008             s->port_table[p].type = type;
1009             s->port_table[p].type_val = val;
1010 
1011             *port = p;
1012 
1013             if (s->nr_ports < p + 1) {
1014                 s->nr_ports = p + 1;
1015             }
1016 
1017             return 0;
1018         }
1019     }
1020     return -ENOSPC;
1021 }
1022 
1023 static bool virq_is_global(uint32_t virq)
1024 {
1025     switch (virq) {
1026     case VIRQ_TIMER:
1027     case VIRQ_DEBUG:
1028     case VIRQ_XENOPROF:
1029     case VIRQ_XENPMU:
1030         return false;
1031 
1032     default:
1033         return true;
1034     }
1035 }
1036 
1037 static int close_port(XenEvtchnState *s, evtchn_port_t port,
1038                       bool *flush_kvm_routes)
1039 {
1040     XenEvtchnPort *p = &s->port_table[port];
1041 
1042     /* Because it *might* be a PIRQ port */
1043     assert(qemu_mutex_iothread_locked());
1044 
1045     switch (p->type) {
1046     case EVTCHNSTAT_closed:
1047         return -ENOENT;
1048 
1049     case EVTCHNSTAT_pirq:
1050         s->pirq[p->type_val].port = 0;
1051         if (s->pirq[p->type_val].is_translated) {
1052             *flush_kvm_routes = true;
1053         }
1054         break;
1055 
1056     case EVTCHNSTAT_virq:
1057         kvm_xen_set_vcpu_virq(virq_is_global(p->type_val) ? 0 : p->vcpu,
1058                               p->type_val, 0);
1059         break;
1060 
1061     case EVTCHNSTAT_ipi:
1062         if (s->evtchn_in_kernel) {
1063             deassign_kernel_port(port);
1064         }
1065         break;
1066 
1067     case EVTCHNSTAT_interdomain:
1068         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
1069             uint16_t be_port = p->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
1070             struct xenevtchn_handle *xc = s->be_handles[be_port];
1071             if (xc) {
1072                 if (kvm_xen_has_cap(EVTCHN_SEND)) {
1073                     deassign_kernel_port(port);
1074                 }
1075                 xc->guest_port = 0;
1076             }
1077         } else {
1078             /* Loopback interdomain */
1079             XenEvtchnPort *rp = &s->port_table[p->type_val];
1080             if (!valid_port(p->type_val) || rp->type_val != port ||
1081                 rp->type != EVTCHNSTAT_interdomain) {
1082                 error_report("Inconsistent state for interdomain unbind");
1083             } else {
1084                 /* Set the other end back to unbound */
1085                 rp->type = EVTCHNSTAT_unbound;
1086                 rp->type_val = 0;
1087             }
1088         }
1089         break;
1090 
1091     default:
1092         break;
1093     }
1094 
1095     free_port(s, port);
1096     return 0;
1097 }
1098 
1099 int xen_evtchn_soft_reset(void)
1100 {
1101     XenEvtchnState *s = xen_evtchn_singleton;
1102     bool flush_kvm_routes;
1103     int i;
1104 
1105     if (!s) {
1106         return -ENOTSUP;
1107     }
1108 
1109     assert(qemu_mutex_iothread_locked());
1110 
1111     qemu_mutex_lock(&s->port_lock);
1112 
1113     for (i = 0; i < s->nr_ports; i++) {
1114         close_port(s, i, &flush_kvm_routes);
1115     }
1116 
1117     qemu_mutex_unlock(&s->port_lock);
1118 
1119     if (flush_kvm_routes) {
1120         kvm_update_msi_routes_all(NULL, true, 0, 0);
1121     }
1122 
1123     return 0;
1124 }
1125 
1126 int xen_evtchn_reset_op(struct evtchn_reset *reset)
1127 {
1128     if (reset->dom != DOMID_SELF && reset->dom != xen_domid) {
1129         return -ESRCH;
1130     }
1131 
1132     return xen_evtchn_soft_reset();
1133 }
1134 
1135 int xen_evtchn_close_op(struct evtchn_close *close)
1136 {
1137     XenEvtchnState *s = xen_evtchn_singleton;
1138     bool flush_kvm_routes = false;
1139     int ret;
1140 
1141     if (!s) {
1142         return -ENOTSUP;
1143     }
1144 
1145     if (!valid_port(close->port)) {
1146         return -EINVAL;
1147     }
1148 
1149     QEMU_IOTHREAD_LOCK_GUARD();
1150     qemu_mutex_lock(&s->port_lock);
1151 
1152     ret = close_port(s, close->port, &flush_kvm_routes);
1153 
1154     qemu_mutex_unlock(&s->port_lock);
1155 
1156     if (flush_kvm_routes) {
1157         kvm_update_msi_routes_all(NULL, true, 0, 0);
1158     }
1159 
1160     return ret;
1161 }
1162 
1163 int xen_evtchn_unmask_op(struct evtchn_unmask *unmask)
1164 {
1165     XenEvtchnState *s = xen_evtchn_singleton;
1166     int ret;
1167 
1168     if (!s) {
1169         return -ENOTSUP;
1170     }
1171 
1172     if (!valid_port(unmask->port)) {
1173         return -EINVAL;
1174     }
1175 
1176     qemu_mutex_lock(&s->port_lock);
1177 
1178     ret = unmask_port(s, unmask->port, true);
1179 
1180     qemu_mutex_unlock(&s->port_lock);
1181 
1182     return ret;
1183 }
1184 
1185 int xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu *vcpu)
1186 {
1187     XenEvtchnState *s = xen_evtchn_singleton;
1188     XenEvtchnPort *p;
1189     int ret = -EINVAL;
1190 
1191     if (!s) {
1192         return -ENOTSUP;
1193     }
1194 
1195     if (!valid_port(vcpu->port)) {
1196         return -EINVAL;
1197     }
1198 
1199     if (!valid_vcpu(vcpu->vcpu)) {
1200         return -ENOENT;
1201     }
1202 
1203     qemu_mutex_lock(&s->port_lock);
1204 
1205     p = &s->port_table[vcpu->port];
1206 
1207     if (p->type == EVTCHNSTAT_interdomain ||
1208         p->type == EVTCHNSTAT_unbound ||
1209         p->type == EVTCHNSTAT_pirq ||
1210         (p->type == EVTCHNSTAT_virq && virq_is_global(p->type_val))) {
1211         /*
1212          * unmask_port() with do_unmask==false will just raise the event
1213          * on the new vCPU if the port was already pending.
1214          */
1215         p->vcpu = vcpu->vcpu;
1216         unmask_port(s, vcpu->port, false);
1217         ret = 0;
1218     }
1219 
1220     qemu_mutex_unlock(&s->port_lock);
1221 
1222     return ret;
1223 }
1224 
1225 int xen_evtchn_bind_virq_op(struct evtchn_bind_virq *virq)
1226 {
1227     XenEvtchnState *s = xen_evtchn_singleton;
1228     int ret;
1229 
1230     if (!s) {
1231         return -ENOTSUP;
1232     }
1233 
1234     if (virq->virq >= NR_VIRQS) {
1235         return -EINVAL;
1236     }
1237 
1238     /* Global VIRQ must be allocated on vCPU0 first */
1239     if (virq_is_global(virq->virq) && virq->vcpu != 0) {
1240         return -EINVAL;
1241     }
1242 
1243     if (!valid_vcpu(virq->vcpu)) {
1244         return -ENOENT;
1245     }
1246 
1247     qemu_mutex_lock(&s->port_lock);
1248 
1249     ret = allocate_port(s, virq->vcpu, EVTCHNSTAT_virq, virq->virq,
1250                         &virq->port);
1251     if (!ret) {
1252         ret = kvm_xen_set_vcpu_virq(virq->vcpu, virq->virq, virq->port);
1253         if (ret) {
1254             free_port(s, virq->port);
1255         }
1256     }
1257 
1258     qemu_mutex_unlock(&s->port_lock);
1259 
1260     return ret;
1261 }
1262 
1263 int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq *pirq)
1264 {
1265     XenEvtchnState *s = xen_evtchn_singleton;
1266     int ret;
1267 
1268     if (!s) {
1269         return -ENOTSUP;
1270     }
1271 
1272     if (pirq->pirq >= s->nr_pirqs) {
1273         return -EINVAL;
1274     }
1275 
1276     QEMU_IOTHREAD_LOCK_GUARD();
1277 
1278     if (s->pirq[pirq->pirq].port) {
1279         return -EBUSY;
1280     }
1281 
1282     qemu_mutex_lock(&s->port_lock);
1283 
1284     ret = allocate_port(s, 0, EVTCHNSTAT_pirq, pirq->pirq,
1285                         &pirq->port);
1286     if (ret) {
1287         qemu_mutex_unlock(&s->port_lock);
1288         return ret;
1289     }
1290 
1291     s->pirq[pirq->pirq].port = pirq->port;
1292     trace_kvm_xen_bind_pirq(pirq->pirq, pirq->port);
1293 
1294     qemu_mutex_unlock(&s->port_lock);
1295 
1296     /*
1297      * Need to do the unmask outside port_lock because it may call
1298      * back into the MSI translate function.
1299      */
1300     if (s->pirq[pirq->pirq].gsi == IRQ_MSI_EMU) {
1301         if (s->pirq[pirq->pirq].is_masked) {
1302             PCIDevice *dev = s->pirq[pirq->pirq].dev;
1303             int vector = s->pirq[pirq->pirq].vector;
1304             char *dev_path = qdev_get_dev_path(DEVICE(dev));
1305 
1306             trace_kvm_xen_unmask_pirq(pirq->pirq, dev_path, vector);
1307             g_free(dev_path);
1308 
1309             if (s->pirq[pirq->pirq].is_msix) {
1310                 msix_set_mask(dev, vector, false);
1311             } else {
1312                 msi_set_mask(dev, vector, false, NULL);
1313             }
1314         } else if (s->pirq[pirq->pirq].is_translated) {
1315             /*
1316              * If KVM had attempted to translate this one before, make it try
1317              * again. If we unmasked, then the notifier on the MSI(-X) vector
1318              * will already have had the same effect.
1319              */
1320             kvm_update_msi_routes_all(NULL, true, 0, 0);
1321         }
1322     }
1323 
1324     return ret;
1325 }
1326 
1327 int xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi *ipi)
1328 {
1329     XenEvtchnState *s = xen_evtchn_singleton;
1330     int ret;
1331 
1332     if (!s) {
1333         return -ENOTSUP;
1334     }
1335 
1336     if (!valid_vcpu(ipi->vcpu)) {
1337         return -ENOENT;
1338     }
1339 
1340     qemu_mutex_lock(&s->port_lock);
1341 
1342     ret = allocate_port(s, ipi->vcpu, EVTCHNSTAT_ipi, 0, &ipi->port);
1343     if (!ret && s->evtchn_in_kernel) {
1344         assign_kernel_port(EVTCHNSTAT_ipi, ipi->port, ipi->vcpu);
1345     }
1346 
1347     qemu_mutex_unlock(&s->port_lock);
1348 
1349     return ret;
1350 }
1351 
1352 int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain)
1353 {
1354     XenEvtchnState *s = xen_evtchn_singleton;
1355     uint16_t type_val;
1356     int ret;
1357 
1358     if (!s) {
1359         return -ENOTSUP;
1360     }
1361 
1362     if (interdomain->remote_dom == DOMID_QEMU) {
1363         type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
1364     } else if (interdomain->remote_dom == DOMID_SELF ||
1365                interdomain->remote_dom == xen_domid) {
1366         type_val = 0;
1367     } else {
1368         return -ESRCH;
1369     }
1370 
1371     if (!valid_port(interdomain->remote_port)) {
1372         return -EINVAL;
1373     }
1374 
1375     qemu_mutex_lock(&s->port_lock);
1376 
1377     /* The newly allocated port starts out as unbound */
1378     ret = allocate_port(s, 0, EVTCHNSTAT_unbound, type_val,
1379                         &interdomain->local_port);
1380     if (ret) {
1381         goto out;
1382     }
1383 
1384     if (interdomain->remote_dom == DOMID_QEMU) {
1385         struct xenevtchn_handle *xc = s->be_handles[interdomain->remote_port];
1386         XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1387 
1388         if (!xc) {
1389             ret = -ENOENT;
1390             goto out_free_port;
1391         }
1392 
1393         if (xc->guest_port) {
1394             ret = -EBUSY;
1395             goto out_free_port;
1396         }
1397 
1398         assert(xc->be_port == interdomain->remote_port);
1399         xc->guest_port = interdomain->local_port;
1400         if (kvm_xen_has_cap(EVTCHN_SEND)) {
1401             assign_kernel_eventfd(lp->type, xc->guest_port, xc->fd);
1402         }
1403         lp->type = EVTCHNSTAT_interdomain;
1404         lp->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU | interdomain->remote_port;
1405         ret = 0;
1406     } else {
1407         /* Loopback */
1408         XenEvtchnPort *rp = &s->port_table[interdomain->remote_port];
1409         XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1410 
1411         if (rp->type == EVTCHNSTAT_unbound && rp->type_val == 0) {
1412             /* It's a match! */
1413             rp->type = EVTCHNSTAT_interdomain;
1414             rp->type_val = interdomain->local_port;
1415 
1416             lp->type = EVTCHNSTAT_interdomain;
1417             lp->type_val = interdomain->remote_port;
1418         } else {
1419             ret = -EINVAL;
1420         }
1421     }
1422 
1423  out_free_port:
1424     if (ret) {
1425         free_port(s, interdomain->local_port);
1426     }
1427  out:
1428     qemu_mutex_unlock(&s->port_lock);
1429 
1430     return ret;
1431 
1432 }
1433 int xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound *alloc)
1434 {
1435     XenEvtchnState *s = xen_evtchn_singleton;
1436     uint16_t type_val;
1437     int ret;
1438 
1439     if (!s) {
1440         return -ENOTSUP;
1441     }
1442 
1443     if (alloc->dom != DOMID_SELF && alloc->dom != xen_domid) {
1444         return -ESRCH;
1445     }
1446 
1447     if (alloc->remote_dom == DOMID_QEMU) {
1448         type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
1449     } else if (alloc->remote_dom == DOMID_SELF ||
1450                alloc->remote_dom == xen_domid) {
1451         type_val = 0;
1452     } else {
1453         return -EPERM;
1454     }
1455 
1456     qemu_mutex_lock(&s->port_lock);
1457 
1458     ret = allocate_port(s, 0, EVTCHNSTAT_unbound, type_val, &alloc->port);
1459 
1460     qemu_mutex_unlock(&s->port_lock);
1461 
1462     return ret;
1463 }
1464 
1465 int xen_evtchn_send_op(struct evtchn_send *send)
1466 {
1467     XenEvtchnState *s = xen_evtchn_singleton;
1468     XenEvtchnPort *p;
1469     int ret = 0;
1470 
1471     if (!s) {
1472         return -ENOTSUP;
1473     }
1474 
1475     if (!valid_port(send->port)) {
1476         return -EINVAL;
1477     }
1478 
1479     qemu_mutex_lock(&s->port_lock);
1480 
1481     p = &s->port_table[send->port];
1482 
1483     switch (p->type) {
1484     case EVTCHNSTAT_interdomain:
1485         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
1486             /*
1487              * This is an event from the guest to qemu itself, which is
1488              * serving as the driver domain.
1489              */
1490             uint16_t be_port = p->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
1491             struct xenevtchn_handle *xc = s->be_handles[be_port];
1492             if (xc) {
1493                 eventfd_write(xc->fd, 1);
1494                 ret = 0;
1495             } else {
1496                 ret = -ENOENT;
1497             }
1498         } else {
1499             /* Loopback interdomain ports; just a complex IPI */
1500             set_port_pending(s, p->type_val);
1501         }
1502         break;
1503 
1504     case EVTCHNSTAT_ipi:
1505         set_port_pending(s, send->port);
1506         break;
1507 
1508     case EVTCHNSTAT_unbound:
1509         /* Xen will silently drop these */
1510         break;
1511 
1512     default:
1513         ret = -EINVAL;
1514         break;
1515     }
1516 
1517     qemu_mutex_unlock(&s->port_lock);
1518 
1519     return ret;
1520 }
1521 
1522 int xen_evtchn_set_port(uint16_t port)
1523 {
1524     XenEvtchnState *s = xen_evtchn_singleton;
1525     XenEvtchnPort *p;
1526     int ret = -EINVAL;
1527 
1528     if (!s) {
1529         return -ENOTSUP;
1530     }
1531 
1532     if (!valid_port(port)) {
1533         return -EINVAL;
1534     }
1535 
1536     qemu_mutex_lock(&s->port_lock);
1537 
1538     p = &s->port_table[port];
1539 
1540     /* QEMU has no business sending to anything but these */
1541     if (p->type == EVTCHNSTAT_virq ||
1542         (p->type == EVTCHNSTAT_interdomain &&
1543          (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU))) {
1544         set_port_pending(s, port);
1545         ret = 0;
1546     }
1547 
1548     qemu_mutex_unlock(&s->port_lock);
1549 
1550     return ret;
1551 }
1552 
1553 static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
1554 {
1555     uint16_t pirq;
1556 
1557     /*
1558      * Preserve the allocation strategy that Xen has. It looks like
1559      * we *never* give out PIRQ 0-15, we give out 16-nr_irqs_gsi only
1560      * to GSIs (counting up from 16), and then we count backwards from
1561      * the top for MSIs or when the GSI space is exhausted.
1562      */
1563     if (type == MAP_PIRQ_TYPE_GSI) {
1564         for (pirq = 16 ; pirq < IOAPIC_NUM_PINS; pirq++) {
1565             if (pirq_inuse(s, pirq)) {
1566                 continue;
1567             }
1568 
1569             /* Found it */
1570             goto found;
1571         }
1572     }
1573     for (pirq = s->nr_pirqs - 1; pirq >= IOAPIC_NUM_PINS; pirq--) {
1574         /* Skip whole words at a time when they're full */
1575         if (pirq_inuse_word(s, pirq) == UINT64_MAX) {
1576             pirq &= ~63ULL;
1577             continue;
1578         }
1579         if (pirq_inuse(s, pirq)) {
1580             continue;
1581         }
1582 
1583         goto found;
1584     }
1585     return -ENOSPC;
1586 
1587  found:
1588     pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1589     if (gsi >= 0) {
1590         assert(gsi <= IOAPIC_NUM_PINS);
1591         s->gsi_pirq[gsi] = pirq;
1592     }
1593     s->pirq[pirq].gsi = gsi;
1594     return pirq;
1595 }
1596 
1597 bool xen_evtchn_set_gsi(int gsi, int level)
1598 {
1599     XenEvtchnState *s = xen_evtchn_singleton;
1600     int pirq;
1601 
1602     assert(qemu_mutex_iothread_locked());
1603 
1604     if (!s || gsi < 0 || gsi > IOAPIC_NUM_PINS) {
1605         return false;
1606     }
1607 
1608     /*
1609      * Check that that it *isn't* the event channel GSI, and thus
1610      * that we are not recursing and it's safe to take s->port_lock.
1611      *
1612      * Locking aside, it's perfectly sane to bail out early for that
1613      * special case, as it would make no sense for the event channel
1614      * GSI to be routed back to event channels, when the delivery
1615      * method is to raise the GSI... that recursion wouldn't *just*
1616      * be a locking issue.
1617      */
1618     if (gsi && gsi == s->callback_gsi) {
1619         return false;
1620     }
1621 
1622     QEMU_LOCK_GUARD(&s->port_lock);
1623 
1624     pirq = s->gsi_pirq[gsi];
1625     if (!pirq) {
1626         return false;
1627     }
1628 
1629     if (level) {
1630         int port = s->pirq[pirq].port;
1631 
1632         s->pirq_gsi_set |= (1U << gsi);
1633         if (port) {
1634             set_port_pending(s, port);
1635         }
1636     } else {
1637         s->pirq_gsi_set &= ~(1U << gsi);
1638     }
1639     return true;
1640 }
1641 
1642 static uint32_t msi_pirq_target(uint64_t addr, uint32_t data)
1643 {
1644     /* The vector (in low 8 bits of data) must be zero */
1645     if (data & 0xff) {
1646         return 0;
1647     }
1648 
1649     uint32_t pirq = (addr & 0xff000) >> 12;
1650     pirq |= (addr >> 32) & 0xffffff00;
1651 
1652     return pirq;
1653 }
1654 
1655 static void do_remove_pci_vector(XenEvtchnState *s, PCIDevice *dev, int vector,
1656                                  int except_pirq)
1657 {
1658     uint32_t pirq;
1659 
1660     for (pirq = 0; pirq < s->nr_pirqs; pirq++) {
1661         /*
1662          * We could be cleverer here, but it isn't really a fast path, and
1663          * this trivial optimisation is enough to let us skip the big gap
1664          * in the middle a bit quicker (in terms of both loop iterations,
1665          * and cache lines).
1666          */
1667         if (!(pirq & 63) && !(pirq_inuse_word(s, pirq))) {
1668             pirq += 64;
1669             continue;
1670         }
1671         if (except_pirq && pirq == except_pirq) {
1672             continue;
1673         }
1674         if (s->pirq[pirq].dev != dev) {
1675             continue;
1676         }
1677         if (vector != -1 && s->pirq[pirq].vector != vector) {
1678             continue;
1679         }
1680 
1681         /* It could theoretically be bound to a port already, but that is OK. */
1682         s->pirq[pirq].dev = dev;
1683         s->pirq[pirq].gsi = IRQ_UNBOUND;
1684         s->pirq[pirq].is_msix = false;
1685         s->pirq[pirq].vector = 0;
1686         s->pirq[pirq].is_masked = false;
1687         s->pirq[pirq].is_translated = false;
1688     }
1689 }
1690 
1691 void xen_evtchn_remove_pci_device(PCIDevice *dev)
1692 {
1693     XenEvtchnState *s = xen_evtchn_singleton;
1694 
1695     if (!s) {
1696         return;
1697     }
1698 
1699     QEMU_LOCK_GUARD(&s->port_lock);
1700     do_remove_pci_vector(s, dev, -1, 0);
1701 }
1702 
1703 void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
1704                           uint64_t addr, uint32_t data, bool is_masked)
1705 {
1706     XenEvtchnState *s = xen_evtchn_singleton;
1707     uint32_t pirq;
1708 
1709     if (!s) {
1710         return;
1711     }
1712 
1713     assert(qemu_mutex_iothread_locked());
1714 
1715     pirq = msi_pirq_target(addr, data);
1716 
1717     /*
1718      * The PIRQ# must be sane, and there must be an allocated PIRQ in
1719      * IRQ_UNBOUND or IRQ_MSI_EMU state to match it.
1720      */
1721     if (!pirq || pirq >= s->nr_pirqs || !pirq_inuse(s, pirq) ||
1722         (s->pirq[pirq].gsi != IRQ_UNBOUND &&
1723          s->pirq[pirq].gsi != IRQ_MSI_EMU)) {
1724         pirq = 0;
1725     }
1726 
1727     if (pirq) {
1728         s->pirq[pirq].dev = dev;
1729         s->pirq[pirq].gsi = IRQ_MSI_EMU;
1730         s->pirq[pirq].is_msix = is_msix;
1731         s->pirq[pirq].vector = vector;
1732         s->pirq[pirq].is_masked = is_masked;
1733     }
1734 
1735     /* Remove any (other) entries for this {device, vector} */
1736     do_remove_pci_vector(s, dev, vector, pirq);
1737 }
1738 
1739 int xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route,
1740                                   uint64_t address, uint32_t data)
1741 {
1742     XenEvtchnState *s = xen_evtchn_singleton;
1743     uint32_t pirq, port;
1744     CPUState *cpu;
1745 
1746     if (!s) {
1747         return 1; /* Not a PIRQ */
1748     }
1749 
1750     assert(qemu_mutex_iothread_locked());
1751 
1752     pirq = msi_pirq_target(address, data);
1753     if (!pirq || pirq >= s->nr_pirqs) {
1754         return 1; /* Not a PIRQ */
1755     }
1756 
1757     if (!kvm_xen_has_cap(EVTCHN_2LEVEL)) {
1758         return -ENOTSUP;
1759     }
1760 
1761     if (s->pirq[pirq].gsi != IRQ_MSI_EMU) {
1762         return -EINVAL;
1763     }
1764 
1765     /* Remember that KVM tried to translate this. It might need to try again. */
1766     s->pirq[pirq].is_translated = true;
1767 
1768     QEMU_LOCK_GUARD(&s->port_lock);
1769 
1770     port = s->pirq[pirq].port;
1771     if (!valid_port(port)) {
1772         return -EINVAL;
1773     }
1774 
1775     cpu = qemu_get_cpu(s->port_table[port].vcpu);
1776     if (!cpu) {
1777         return -EINVAL;
1778     }
1779 
1780     route->type = KVM_IRQ_ROUTING_XEN_EVTCHN;
1781     route->u.xen_evtchn.port = port;
1782     route->u.xen_evtchn.vcpu = kvm_arch_vcpu_id(cpu);
1783     route->u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
1784 
1785     return 0; /* Handled */
1786 }
1787 
1788 bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data)
1789 {
1790     XenEvtchnState *s = xen_evtchn_singleton;
1791     uint32_t pirq, port;
1792 
1793     if (!s) {
1794         return false;
1795     }
1796 
1797     assert(qemu_mutex_iothread_locked());
1798 
1799     pirq = msi_pirq_target(address, data);
1800     if (!pirq || pirq >= s->nr_pirqs) {
1801         return false;
1802     }
1803 
1804     QEMU_LOCK_GUARD(&s->port_lock);
1805 
1806     port = s->pirq[pirq].port;
1807     if (!valid_port(port)) {
1808         return false;
1809     }
1810 
1811     set_port_pending(s, port);
1812     return true;
1813 }
1814 
1815 int xen_physdev_map_pirq(struct physdev_map_pirq *map)
1816 {
1817     XenEvtchnState *s = xen_evtchn_singleton;
1818     int pirq = map->pirq;
1819     int gsi = map->index;
1820 
1821     if (!s) {
1822         return -ENOTSUP;
1823     }
1824 
1825     QEMU_IOTHREAD_LOCK_GUARD();
1826     QEMU_LOCK_GUARD(&s->port_lock);
1827 
1828     if (map->domid != DOMID_SELF && map->domid != xen_domid) {
1829         return -EPERM;
1830     }
1831     if (map->type != MAP_PIRQ_TYPE_GSI) {
1832         return -EINVAL;
1833     }
1834     if (gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
1835         return -EINVAL;
1836     }
1837 
1838     if (pirq < 0) {
1839         pirq = allocate_pirq(s, map->type, gsi);
1840         if (pirq < 0) {
1841             return pirq;
1842         }
1843         map->pirq = pirq;
1844     } else if (pirq > s->nr_pirqs) {
1845         return -EINVAL;
1846     } else {
1847         /*
1848          * User specified a valid-looking PIRQ#. Allow it if it is
1849          * allocated and not yet bound, or if it is unallocated
1850          */
1851         if (pirq_inuse(s, pirq)) {
1852             if (s->pirq[pirq].gsi != IRQ_UNBOUND) {
1853                 return -EBUSY;
1854             }
1855         } else {
1856             /* If it was unused, mark it used now. */
1857             pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1858         }
1859         /* Set the mapping in both directions. */
1860         s->pirq[pirq].gsi = gsi;
1861         s->gsi_pirq[gsi] = pirq;
1862     }
1863 
1864     trace_kvm_xen_map_pirq(pirq, gsi);
1865     return 0;
1866 }
1867 
1868 int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
1869 {
1870     XenEvtchnState *s = xen_evtchn_singleton;
1871     int pirq = unmap->pirq;
1872     int gsi;
1873 
1874     if (!s) {
1875         return -ENOTSUP;
1876     }
1877 
1878     if (unmap->domid != DOMID_SELF && unmap->domid != xen_domid) {
1879         return -EPERM;
1880     }
1881     if (pirq < 0 || pirq >= s->nr_pirqs) {
1882         return -EINVAL;
1883     }
1884 
1885     QEMU_IOTHREAD_LOCK_GUARD();
1886     qemu_mutex_lock(&s->port_lock);
1887 
1888     if (!pirq_inuse(s, pirq)) {
1889         qemu_mutex_unlock(&s->port_lock);
1890         return -ENOENT;
1891     }
1892 
1893     gsi = s->pirq[pirq].gsi;
1894 
1895     /* We can only unmap GSI PIRQs */
1896     if (gsi < 0) {
1897         qemu_mutex_unlock(&s->port_lock);
1898         return -EINVAL;
1899     }
1900 
1901     s->gsi_pirq[gsi] = 0;
1902     s->pirq[pirq].gsi = IRQ_UNBOUND; /* Doesn't actually matter because: */
1903     pirq_inuse_word(s, pirq) &= ~pirq_inuse_bit(pirq);
1904 
1905     trace_kvm_xen_unmap_pirq(pirq, gsi);
1906     qemu_mutex_unlock(&s->port_lock);
1907 
1908     if (gsi == IRQ_MSI_EMU) {
1909         kvm_update_msi_routes_all(NULL, true, 0, 0);
1910     }
1911 
1912     return 0;
1913 }
1914 
1915 int xen_physdev_eoi_pirq(struct physdev_eoi *eoi)
1916 {
1917     XenEvtchnState *s = xen_evtchn_singleton;
1918     int pirq = eoi->irq;
1919     int gsi;
1920 
1921     if (!s) {
1922         return -ENOTSUP;
1923     }
1924 
1925     QEMU_IOTHREAD_LOCK_GUARD();
1926     QEMU_LOCK_GUARD(&s->port_lock);
1927 
1928     if (!pirq_inuse(s, pirq)) {
1929         return -ENOENT;
1930     }
1931 
1932     gsi = s->pirq[pirq].gsi;
1933     if (gsi < 0) {
1934         return -EINVAL;
1935     }
1936 
1937     /* Reassert a level IRQ if needed */
1938     if (s->pirq_gsi_set & (1U << gsi)) {
1939         int port = s->pirq[pirq].port;
1940         if (port) {
1941             set_port_pending(s, port);
1942         }
1943     }
1944 
1945     return 0;
1946 }
1947 
1948 int xen_physdev_query_pirq(struct physdev_irq_status_query *query)
1949 {
1950     XenEvtchnState *s = xen_evtchn_singleton;
1951     int pirq = query->irq;
1952 
1953     if (!s) {
1954         return -ENOTSUP;
1955     }
1956 
1957     QEMU_IOTHREAD_LOCK_GUARD();
1958     QEMU_LOCK_GUARD(&s->port_lock);
1959 
1960     if (!pirq_inuse(s, pirq)) {
1961         return -ENOENT;
1962     }
1963 
1964     if (s->pirq[pirq].gsi >= 0) {
1965         query->flags = XENIRQSTAT_needs_eoi;
1966     } else {
1967         query->flags = 0;
1968     }
1969 
1970     return 0;
1971 }
1972 
1973 int xen_physdev_get_free_pirq(struct physdev_get_free_pirq *get)
1974 {
1975     XenEvtchnState *s = xen_evtchn_singleton;
1976     int pirq;
1977 
1978     if (!s) {
1979         return -ENOTSUP;
1980     }
1981 
1982     QEMU_LOCK_GUARD(&s->port_lock);
1983 
1984     pirq = allocate_pirq(s, get->type, IRQ_UNBOUND);
1985     if (pirq < 0) {
1986         return pirq;
1987     }
1988 
1989     get->pirq = pirq;
1990     trace_kvm_xen_get_free_pirq(pirq, get->type);
1991     return 0;
1992 }
1993 
1994 struct xenevtchn_handle *xen_be_evtchn_open(void)
1995 {
1996     struct xenevtchn_handle *xc = g_new0(struct xenevtchn_handle, 1);
1997 
1998     xc->fd = eventfd(0, EFD_CLOEXEC);
1999     if (xc->fd < 0) {
2000         free(xc);
2001         return NULL;
2002     }
2003 
2004     return xc;
2005 }
2006 
2007 static int find_be_port(XenEvtchnState *s, struct xenevtchn_handle *xc)
2008 {
2009     int i;
2010 
2011     for (i = 1; i < EVTCHN_2L_NR_CHANNELS; i++) {
2012         if (!s->be_handles[i]) {
2013             s->be_handles[i] = xc;
2014             xc->be_port = i;
2015             return i;
2016         }
2017     }
2018     return 0;
2019 }
2020 
2021 int xen_be_evtchn_bind_interdomain(struct xenevtchn_handle *xc, uint32_t domid,
2022                                    evtchn_port_t guest_port)
2023 {
2024     XenEvtchnState *s = xen_evtchn_singleton;
2025     XenEvtchnPort *gp;
2026     uint16_t be_port = 0;
2027     int ret;
2028 
2029     if (!s) {
2030         return -ENOTSUP;
2031     }
2032 
2033     if (!xc) {
2034         return -EFAULT;
2035     }
2036 
2037     if (domid != xen_domid) {
2038         return -ESRCH;
2039     }
2040 
2041     if (!valid_port(guest_port)) {
2042         return -EINVAL;
2043     }
2044 
2045     qemu_mutex_lock(&s->port_lock);
2046 
2047     /* The guest has to have an unbound port waiting for us to bind */
2048     gp = &s->port_table[guest_port];
2049 
2050     switch (gp->type) {
2051     case EVTCHNSTAT_interdomain:
2052         /* Allow rebinding after migration, preserve port # if possible */
2053         be_port = gp->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
2054         assert(be_port != 0);
2055         if (!s->be_handles[be_port]) {
2056             s->be_handles[be_port] = xc;
2057             xc->guest_port = guest_port;
2058             ret = xc->be_port = be_port;
2059             if (kvm_xen_has_cap(EVTCHN_SEND)) {
2060                 assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2061             }
2062             break;
2063         }
2064         /* fall through */
2065 
2066     case EVTCHNSTAT_unbound:
2067         be_port = find_be_port(s, xc);
2068         if (!be_port) {
2069             ret = -ENOSPC;
2070             goto out;
2071         }
2072 
2073         gp->type = EVTCHNSTAT_interdomain;
2074         gp->type_val = be_port | PORT_INFO_TYPEVAL_REMOTE_QEMU;
2075         xc->guest_port = guest_port;
2076         if (kvm_xen_has_cap(EVTCHN_SEND)) {
2077             assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2078         }
2079         ret = be_port;
2080         break;
2081 
2082     default:
2083         ret = -EINVAL;
2084         break;
2085     }
2086 
2087  out:
2088     qemu_mutex_unlock(&s->port_lock);
2089 
2090     return ret;
2091 }
2092 
2093 int xen_be_evtchn_unbind(struct xenevtchn_handle *xc, evtchn_port_t port)
2094 {
2095     XenEvtchnState *s = xen_evtchn_singleton;
2096     int ret;
2097 
2098     if (!s) {
2099         return -ENOTSUP;
2100     }
2101 
2102     if (!xc) {
2103         return -EFAULT;
2104     }
2105 
2106     qemu_mutex_lock(&s->port_lock);
2107 
2108     if (port && port != xc->be_port) {
2109         ret = -EINVAL;
2110         goto out;
2111     }
2112 
2113     if (xc->guest_port) {
2114         XenEvtchnPort *gp = &s->port_table[xc->guest_port];
2115 
2116         /* This should never *not* be true */
2117         if (gp->type == EVTCHNSTAT_interdomain) {
2118             gp->type = EVTCHNSTAT_unbound;
2119             gp->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
2120         }
2121 
2122         if (kvm_xen_has_cap(EVTCHN_SEND)) {
2123             deassign_kernel_port(xc->guest_port);
2124         }
2125         xc->guest_port = 0;
2126     }
2127 
2128     s->be_handles[xc->be_port] = NULL;
2129     xc->be_port = 0;
2130     ret = 0;
2131  out:
2132     qemu_mutex_unlock(&s->port_lock);
2133     return ret;
2134 }
2135 
2136 int xen_be_evtchn_close(struct xenevtchn_handle *xc)
2137 {
2138     if (!xc) {
2139         return -EFAULT;
2140     }
2141 
2142     xen_be_evtchn_unbind(xc, 0);
2143 
2144     close(xc->fd);
2145     free(xc);
2146     return 0;
2147 }
2148 
2149 int xen_be_evtchn_fd(struct xenevtchn_handle *xc)
2150 {
2151     if (!xc) {
2152         return -1;
2153     }
2154     return xc->fd;
2155 }
2156 
2157 int xen_be_evtchn_notify(struct xenevtchn_handle *xc, evtchn_port_t port)
2158 {
2159     XenEvtchnState *s = xen_evtchn_singleton;
2160     int ret;
2161 
2162     if (!s) {
2163         return -ENOTSUP;
2164     }
2165 
2166     if (!xc) {
2167         return -EFAULT;
2168     }
2169 
2170     qemu_mutex_lock(&s->port_lock);
2171 
2172     if (xc->guest_port) {
2173         set_port_pending(s, xc->guest_port);
2174         ret = 0;
2175     } else {
2176         ret = -ENOTCONN;
2177     }
2178 
2179     qemu_mutex_unlock(&s->port_lock);
2180 
2181     return ret;
2182 }
2183 
2184 int xen_be_evtchn_pending(struct xenevtchn_handle *xc)
2185 {
2186     uint64_t val;
2187 
2188     if (!xc) {
2189         return -EFAULT;
2190     }
2191 
2192     if (!xc->be_port) {
2193         return 0;
2194     }
2195 
2196     if (eventfd_read(xc->fd, &val)) {
2197         return -errno;
2198     }
2199 
2200     return val ? xc->be_port : 0;
2201 }
2202 
2203 int xen_be_evtchn_unmask(struct xenevtchn_handle *xc, evtchn_port_t port)
2204 {
2205     if (!xc) {
2206         return -EFAULT;
2207     }
2208 
2209     if (xc->be_port != port) {
2210         return -EINVAL;
2211     }
2212 
2213     /*
2214      * We don't actually do anything to unmask it; the event was already
2215      * consumed in xen_be_evtchn_pending().
2216      */
2217     return 0;
2218 }
2219 
2220 int xen_be_evtchn_get_guest_port(struct xenevtchn_handle *xc)
2221 {
2222     return xc->guest_port;
2223 }
2224 
2225 EvtchnInfoList *qmp_xen_event_list(Error **errp)
2226 {
2227     XenEvtchnState *s = xen_evtchn_singleton;
2228     EvtchnInfoList *head = NULL, **tail = &head;
2229     void *shinfo, *pending, *mask;
2230     int i;
2231 
2232     if (!s) {
2233         error_setg(errp, "Xen event channel emulation not enabled");
2234         return NULL;
2235     }
2236 
2237     shinfo = xen_overlay_get_shinfo_ptr();
2238     if (!shinfo) {
2239         error_setg(errp, "Xen shared info page not allocated");
2240         return NULL;
2241     }
2242 
2243     if (xen_is_long_mode()) {
2244         pending = shinfo + offsetof(struct shared_info, evtchn_pending);
2245         mask = shinfo + offsetof(struct shared_info, evtchn_mask);
2246     } else {
2247         pending = shinfo + offsetof(struct compat_shared_info, evtchn_pending);
2248         mask = shinfo + offsetof(struct compat_shared_info, evtchn_mask);
2249     }
2250 
2251     QEMU_LOCK_GUARD(&s->port_lock);
2252 
2253     for (i = 0; i < s->nr_ports; i++) {
2254         XenEvtchnPort *p = &s->port_table[i];
2255         EvtchnInfo *info;
2256 
2257         if (p->type == EVTCHNSTAT_closed) {
2258             continue;
2259         }
2260 
2261         info = g_new0(EvtchnInfo, 1);
2262 
2263         info->port = i;
2264         qemu_build_assert(EVTCHN_PORT_TYPE_CLOSED == EVTCHNSTAT_closed);
2265         qemu_build_assert(EVTCHN_PORT_TYPE_UNBOUND == EVTCHNSTAT_unbound);
2266         qemu_build_assert(EVTCHN_PORT_TYPE_INTERDOMAIN == EVTCHNSTAT_interdomain);
2267         qemu_build_assert(EVTCHN_PORT_TYPE_PIRQ == EVTCHNSTAT_pirq);
2268         qemu_build_assert(EVTCHN_PORT_TYPE_VIRQ == EVTCHNSTAT_virq);
2269         qemu_build_assert(EVTCHN_PORT_TYPE_IPI == EVTCHNSTAT_ipi);
2270 
2271         info->type = p->type;
2272         if (p->type == EVTCHNSTAT_interdomain) {
2273             info->remote_domain = g_strdup((p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) ?
2274                                            "qemu" : "loopback");
2275             info->target = p->type_val & PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
2276         } else {
2277             info->target = p->type_val;
2278         }
2279         info->vcpu = p->vcpu;
2280         info->pending = test_bit(i, pending);
2281         info->masked = test_bit(i, mask);
2282 
2283         QAPI_LIST_APPEND(tail, info);
2284     }
2285 
2286     return head;
2287 }
2288 
2289 void qmp_xen_event_inject(uint32_t port, Error **errp)
2290 {
2291     XenEvtchnState *s = xen_evtchn_singleton;
2292 
2293     if (!s) {
2294         error_setg(errp, "Xen event channel emulation not enabled");
2295         return;
2296     }
2297 
2298     if (!valid_port(port)) {
2299         error_setg(errp, "Invalid port %u", port);
2300     }
2301 
2302     QEMU_LOCK_GUARD(&s->port_lock);
2303 
2304     if (set_port_pending(s, port)) {
2305         error_setg(errp, "Failed to set port %u", port);
2306         return;
2307     }
2308 }
2309 
2310 void hmp_xen_event_list(Monitor *mon, const QDict *qdict)
2311 {
2312     EvtchnInfoList *iter, *info_list;
2313     Error *err = NULL;
2314 
2315     info_list = qmp_xen_event_list(&err);
2316     if (err) {
2317         hmp_handle_error(mon, err);
2318         return;
2319     }
2320 
2321     for (iter = info_list; iter; iter = iter->next) {
2322         EvtchnInfo *info = iter->value;
2323 
2324         monitor_printf(mon, "port %4u: vcpu: %d %s", info->port, info->vcpu,
2325                        EvtchnPortType_str(info->type));
2326         if (info->type != EVTCHN_PORT_TYPE_IPI) {
2327             monitor_printf(mon,  "(");
2328             if (info->remote_domain) {
2329                 monitor_printf(mon, "%s:", info->remote_domain);
2330             }
2331             monitor_printf(mon, "%d)", info->target);
2332         }
2333         if (info->pending) {
2334             monitor_printf(mon, " PENDING");
2335         }
2336         if (info->masked) {
2337             monitor_printf(mon, " MASKED");
2338         }
2339         monitor_printf(mon, "\n");
2340     }
2341 
2342     qapi_free_EvtchnInfoList(info_list);
2343 }
2344 
2345 void hmp_xen_event_inject(Monitor *mon, const QDict *qdict)
2346 {
2347     int port = qdict_get_int(qdict, "port");
2348     Error *err = NULL;
2349 
2350     qmp_xen_event_inject(port, &err);
2351     if (err) {
2352         hmp_handle_error(mon, err);
2353     } else {
2354         monitor_printf(mon, "Delivered port %d\n", port);
2355     }
2356 }
2357 
2358