1 /*
2 * QEMU Xen emulation: Event channel support
3 *
4 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5 *
6 * Authors: David Woodhouse <dwmw2@infradead.org>
7 *
8 * This work is licensed under the terms of the GNU GPL, version 2 or later.
9 * See the COPYING file in the top-level directory.
10 */
11
12 #include "qemu/osdep.h"
13 #include "qemu/host-utils.h"
14 #include "qemu/module.h"
15 #include "qemu/lockable.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/log.h"
18 #include "qemu/error-report.h"
19 #include "monitor/monitor.h"
20 #include "monitor/hmp.h"
21 #include "qapi/error.h"
22 #include "qapi/qapi-commands-misc-target.h"
23 #include "qapi/qmp/qdict.h"
24 #include "qom/object.h"
25 #include "exec/target_page.h"
26 #include "exec/address-spaces.h"
27 #include "migration/vmstate.h"
28 #include "trace.h"
29
30 #include "hw/sysbus.h"
31 #include "hw/xen/xen.h"
32 #include "hw/i386/x86.h"
33 #include "hw/i386/pc.h"
34 #include "hw/pci/pci.h"
35 #include "hw/pci/msi.h"
36 #include "hw/pci/msix.h"
37 #include "hw/irq.h"
38 #include "hw/xen/xen_backend_ops.h"
39
40 #include "xen_evtchn.h"
41 #include "xen_overlay.h"
42 #include "xen_xenstore.h"
43
44 #include "sysemu/kvm.h"
45 #include "sysemu/kvm_xen.h"
46 #include <linux/kvm.h>
47 #include <sys/eventfd.h>
48
49 #include "hw/xen/interface/memory.h"
50 #include "hw/xen/interface/hvm/params.h"
51
52 /* XX: For kvm_update_msi_routes_all() */
53 #include "target/i386/kvm/kvm_i386.h"
54
55 #define TYPE_XEN_EVTCHN "xen-evtchn"
56 OBJECT_DECLARE_SIMPLE_TYPE(XenEvtchnState, XEN_EVTCHN)
57
58 typedef struct XenEvtchnPort {
59 uint32_t vcpu; /* Xen/ACPI vcpu_id */
60 uint16_t type; /* EVTCHNSTAT_xxxx */
61 union {
62 uint16_t val; /* raw value for serialization etc. */
63 uint16_t pirq;
64 uint16_t virq;
65 struct {
66 uint16_t port:15;
67 uint16_t to_qemu:1; /* Only two targets; qemu or loopback */
68 } interdomain;
69 } u;
70 } XenEvtchnPort;
71
72 /* 32-bit compatibility definitions, also used natively in 32-bit build */
73 struct compat_arch_vcpu_info {
74 unsigned int cr2;
75 unsigned int pad[5];
76 };
77
78 struct compat_vcpu_info {
79 uint8_t evtchn_upcall_pending;
80 uint8_t evtchn_upcall_mask;
81 uint16_t pad;
82 uint32_t evtchn_pending_sel;
83 struct compat_arch_vcpu_info arch;
84 struct vcpu_time_info time;
85 }; /* 64 bytes (x86) */
86
87 struct compat_arch_shared_info {
88 unsigned int max_pfn;
89 unsigned int pfn_to_mfn_frame_list_list;
90 unsigned int nmi_reason;
91 unsigned int p2m_cr3;
92 unsigned int p2m_vaddr;
93 unsigned int p2m_generation;
94 uint32_t wc_sec_hi;
95 };
96
97 struct compat_shared_info {
98 struct compat_vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS];
99 uint32_t evtchn_pending[32];
100 uint32_t evtchn_mask[32];
101 uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */
102 uint32_t wc_sec;
103 uint32_t wc_nsec;
104 struct compat_arch_shared_info arch;
105 };
106
107 #define COMPAT_EVTCHN_2L_NR_CHANNELS 1024
108
109 /* Local private implementation of struct xenevtchn_handle */
110 struct xenevtchn_handle {
111 evtchn_port_t be_port;
112 evtchn_port_t guest_port; /* Or zero for unbound */
113 int fd;
114 };
115
116 /*
117 * These 'emuirq' values are used by Xen in the LM stream... and yes, I am
118 * insane enough to think about guest-transparent live migration from actual
119 * Xen to QEMU, and ensuring that we can convert/consume the stream.
120 */
121 #define IRQ_UNBOUND -1
122 #define IRQ_PT -2
123 #define IRQ_MSI_EMU -3
124
125
126 struct pirq_info {
127 int gsi;
128 uint16_t port;
129 PCIDevice *dev;
130 int vector;
131 bool is_msix;
132 bool is_masked;
133 bool is_translated;
134 };
135
136 struct XenEvtchnState {
137 /*< private >*/
138 SysBusDevice busdev;
139 /*< public >*/
140
141 uint64_t callback_param;
142 bool evtchn_in_kernel;
143 uint32_t callback_gsi;
144
145 QEMUBH *gsi_bh;
146
147 QemuMutex port_lock;
148 uint32_t nr_ports;
149 XenEvtchnPort port_table[EVTCHN_2L_NR_CHANNELS];
150
151 /* Connected to the system GSIs for raising callback as GSI / INTx */
152 unsigned int nr_callback_gsis;
153 qemu_irq *callback_gsis;
154
155 struct xenevtchn_handle *be_handles[EVTCHN_2L_NR_CHANNELS];
156
157 uint32_t nr_pirqs;
158
159 /* Bitmap of allocated PIRQs (serialized) */
160 uint16_t nr_pirq_inuse_words;
161 uint64_t *pirq_inuse_bitmap;
162
163 /* GSI → PIRQ mapping (serialized) */
164 uint16_t gsi_pirq[IOAPIC_NUM_PINS];
165
166 /* Per-GSI assertion state (serialized) */
167 uint32_t pirq_gsi_set;
168
169 /* Per-PIRQ information (rebuilt on migration, protected by BQL) */
170 struct pirq_info *pirq;
171 };
172
173 #define pirq_inuse_word(s, pirq) (s->pirq_inuse_bitmap[((pirq) / 64)])
174 #define pirq_inuse_bit(pirq) (1ULL << ((pirq) & 63))
175
176 #define pirq_inuse(s, pirq) (pirq_inuse_word(s, pirq) & pirq_inuse_bit(pirq))
177
178 struct XenEvtchnState *xen_evtchn_singleton;
179
180 /* Top bits of callback_param are the type (HVM_PARAM_CALLBACK_TYPE_xxx) */
181 #define CALLBACK_VIA_TYPE_SHIFT 56
182
183 static void unbind_backend_ports(XenEvtchnState *s);
184
xen_evtchn_pre_load(void * opaque)185 static int xen_evtchn_pre_load(void *opaque)
186 {
187 XenEvtchnState *s = opaque;
188
189 /* Unbind all the backend-side ports; they need to rebind */
190 unbind_backend_ports(s);
191
192 /* It'll be leaked otherwise. */
193 g_free(s->pirq_inuse_bitmap);
194 s->pirq_inuse_bitmap = NULL;
195
196 return 0;
197 }
198
xen_evtchn_post_load(void * opaque,int version_id)199 static int xen_evtchn_post_load(void *opaque, int version_id)
200 {
201 XenEvtchnState *s = opaque;
202 uint32_t i;
203
204 if (s->callback_param) {
205 xen_evtchn_set_callback_param(s->callback_param);
206 }
207
208 /* Rebuild s->pirq[].port mapping */
209 for (i = 0; i < s->nr_ports; i++) {
210 XenEvtchnPort *p = &s->port_table[i];
211
212 if (p->type == EVTCHNSTAT_pirq) {
213 assert(p->u.pirq);
214 assert(p->u.pirq < s->nr_pirqs);
215
216 /*
217 * Set the gsi to IRQ_UNBOUND; it may be changed to an actual
218 * GSI# below, or to IRQ_MSI_EMU when the MSI table snooping
219 * catches up with it.
220 */
221 s->pirq[p->u.pirq].gsi = IRQ_UNBOUND;
222 s->pirq[p->u.pirq].port = i;
223 }
224 }
225 /* Rebuild s->pirq[].gsi mapping */
226 for (i = 0; i < IOAPIC_NUM_PINS; i++) {
227 if (s->gsi_pirq[i]) {
228 s->pirq[s->gsi_pirq[i]].gsi = i;
229 }
230 }
231 return 0;
232 }
233
xen_evtchn_is_needed(void * opaque)234 static bool xen_evtchn_is_needed(void *opaque)
235 {
236 return xen_mode == XEN_EMULATE;
237 }
238
239 static const VMStateDescription xen_evtchn_port_vmstate = {
240 .name = "xen_evtchn_port",
241 .version_id = 1,
242 .minimum_version_id = 1,
243 .fields = (const VMStateField[]) {
244 VMSTATE_UINT32(vcpu, XenEvtchnPort),
245 VMSTATE_UINT16(type, XenEvtchnPort),
246 VMSTATE_UINT16(u.val, XenEvtchnPort),
247 VMSTATE_END_OF_LIST()
248 }
249 };
250
251 static const VMStateDescription xen_evtchn_vmstate = {
252 .name = "xen_evtchn",
253 .version_id = 1,
254 .minimum_version_id = 1,
255 .needed = xen_evtchn_is_needed,
256 .pre_load = xen_evtchn_pre_load,
257 .post_load = xen_evtchn_post_load,
258 .fields = (const VMStateField[]) {
259 VMSTATE_UINT64(callback_param, XenEvtchnState),
260 VMSTATE_UINT32(nr_ports, XenEvtchnState),
261 VMSTATE_STRUCT_VARRAY_UINT32(port_table, XenEvtchnState, nr_ports, 1,
262 xen_evtchn_port_vmstate, XenEvtchnPort),
263 VMSTATE_UINT16_ARRAY(gsi_pirq, XenEvtchnState, IOAPIC_NUM_PINS),
264 VMSTATE_VARRAY_UINT16_ALLOC(pirq_inuse_bitmap, XenEvtchnState,
265 nr_pirq_inuse_words, 0,
266 vmstate_info_uint64, uint64_t),
267 VMSTATE_UINT32(pirq_gsi_set, XenEvtchnState),
268 VMSTATE_END_OF_LIST()
269 }
270 };
271
xen_evtchn_class_init(ObjectClass * klass,void * data)272 static void xen_evtchn_class_init(ObjectClass *klass, void *data)
273 {
274 DeviceClass *dc = DEVICE_CLASS(klass);
275
276 dc->vmsd = &xen_evtchn_vmstate;
277 }
278
279 static const TypeInfo xen_evtchn_info = {
280 .name = TYPE_XEN_EVTCHN,
281 .parent = TYPE_SYS_BUS_DEVICE,
282 .instance_size = sizeof(XenEvtchnState),
283 .class_init = xen_evtchn_class_init,
284 };
285
286 static struct evtchn_backend_ops emu_evtchn_backend_ops = {
287 .open = xen_be_evtchn_open,
288 .bind_interdomain = xen_be_evtchn_bind_interdomain,
289 .unbind = xen_be_evtchn_unbind,
290 .close = xen_be_evtchn_close,
291 .get_fd = xen_be_evtchn_fd,
292 .notify = xen_be_evtchn_notify,
293 .unmask = xen_be_evtchn_unmask,
294 .pending = xen_be_evtchn_pending,
295 };
296
gsi_assert_bh(void * opaque)297 static void gsi_assert_bh(void *opaque)
298 {
299 struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
300 if (vi) {
301 xen_evtchn_set_callback_level(!!vi->evtchn_upcall_pending);
302 }
303 }
304
xen_evtchn_create(unsigned int nr_gsis,qemu_irq * system_gsis)305 void xen_evtchn_create(unsigned int nr_gsis, qemu_irq *system_gsis)
306 {
307 XenEvtchnState *s = XEN_EVTCHN(sysbus_create_simple(TYPE_XEN_EVTCHN,
308 -1, NULL));
309 int i;
310
311 xen_evtchn_singleton = s;
312
313 qemu_mutex_init(&s->port_lock);
314 s->gsi_bh = aio_bh_new(qemu_get_aio_context(), gsi_assert_bh, s);
315
316 /*
317 * These are the *output* GSI from event channel support, for
318 * signalling CPU0's events via GSI or PCI INTx instead of the
319 * per-CPU vector. We create a *set* of irqs and connect one to
320 * each of the system GSIs which were passed in from the platform
321 * code, and then just trigger the right one as appropriate from
322 * xen_evtchn_set_callback_level().
323 */
324 s->nr_callback_gsis = nr_gsis;
325 s->callback_gsis = g_new0(qemu_irq, nr_gsis);
326 for (i = 0; i < nr_gsis; i++) {
327 sysbus_init_irq(SYS_BUS_DEVICE(s), &s->callback_gsis[i]);
328 sysbus_connect_irq(SYS_BUS_DEVICE(s), i, system_gsis[i]);
329 }
330
331 /*
332 * The Xen scheme for encoding PIRQ# into an MSI message is not
333 * compatible with 32-bit MSI, as it puts the high bits of the
334 * PIRQ# into the high bits of the MSI message address, instead of
335 * using the Extended Destination ID in address bits 4-11 which
336 * perhaps would have been a better choice.
337 *
338 * To keep life simple, kvm_accel_instance_init() initialises the
339 * default to 256. which conveniently doesn't need to set anything
340 * outside the low 32 bits of the address. It can be increased by
341 * setting the xen-evtchn-max-pirq property.
342 */
343 s->nr_pirqs = kvm_xen_get_evtchn_max_pirq();
344
345 s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64);
346 s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words);
347 s->pirq = g_new0(struct pirq_info, s->nr_pirqs);
348
349 /* Set event channel functions for backend drivers to use */
350 xen_evtchn_ops = &emu_evtchn_backend_ops;
351 }
352
xen_evtchn_register_types(void)353 static void xen_evtchn_register_types(void)
354 {
355 type_register_static(&xen_evtchn_info);
356 }
357
type_init(xen_evtchn_register_types)358 type_init(xen_evtchn_register_types)
359
360 static int set_callback_pci_intx(XenEvtchnState *s, uint64_t param)
361 {
362 PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
363 uint8_t pin = param & 3;
364 uint8_t devfn = (param >> 8) & 0xff;
365 uint16_t bus = (param >> 16) & 0xffff;
366 uint16_t domain = (param >> 32) & 0xffff;
367 PCIDevice *pdev;
368 PCIINTxRoute r;
369
370 if (domain || !pcms) {
371 return 0;
372 }
373
374 pdev = pci_find_device(pcms->pcibus, bus, devfn);
375 if (!pdev) {
376 return 0;
377 }
378
379 r = pci_device_route_intx_to_irq(pdev, pin);
380 if (r.mode != PCI_INTX_ENABLED) {
381 return 0;
382 }
383
384 /*
385 * Hm, can we be notified of INTX routing changes? Not without
386 * *owning* the device and being allowed to overwrite its own
387 * ->intx_routing_notifier, AFAICT. So let's not.
388 */
389 return r.irq;
390 }
391
xen_evtchn_set_callback_level(int level)392 void xen_evtchn_set_callback_level(int level)
393 {
394 XenEvtchnState *s = xen_evtchn_singleton;
395 if (!s) {
396 return;
397 }
398
399 /*
400 * We get to this function in a number of ways:
401 *
402 * • From I/O context, via PV backend drivers sending a notification to
403 * the guest.
404 *
405 * • From guest vCPU context, via loopback interdomain event channels
406 * (or theoretically even IPIs but guests don't use those with GSI
407 * delivery because that's pointless. We don't want a malicious guest
408 * to be able to trigger a deadlock though, so we can't rule it out.)
409 *
410 * • From guest vCPU context when the HVM_PARAM_CALLBACK_IRQ is being
411 * configured.
412 *
413 * • From guest vCPU context in the KVM exit handler, if the upcall
414 * pending flag has been cleared and the GSI needs to be deasserted.
415 *
416 * • Maybe in future, in an interrupt ack/eoi notifier when the GSI has
417 * been acked in the irqchip.
418 *
419 * Whichever context we come from if we aren't already holding the BQL
420 * then e can't take it now, as we may already hold s->port_lock. So
421 * trigger the BH to set the IRQ for us instead of doing it immediately.
422 *
423 * In the HVM_PARAM_CALLBACK_IRQ and KVM exit handler cases, the caller
424 * will deliberately take the BQL because they want the change to take
425 * effect immediately. That just leaves interdomain loopback as the case
426 * which uses the BH.
427 */
428 if (!bql_locked()) {
429 qemu_bh_schedule(s->gsi_bh);
430 return;
431 }
432
433 if (s->callback_gsi && s->callback_gsi < s->nr_callback_gsis) {
434 qemu_set_irq(s->callback_gsis[s->callback_gsi], level);
435 if (level) {
436 /* Ensure the vCPU polls for deassertion */
437 kvm_xen_set_callback_asserted();
438 }
439 }
440 }
441
xen_evtchn_set_callback_param(uint64_t param)442 int xen_evtchn_set_callback_param(uint64_t param)
443 {
444 XenEvtchnState *s = xen_evtchn_singleton;
445 struct kvm_xen_hvm_attr xa = {
446 .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
447 .u.vector = 0,
448 };
449 bool in_kernel = false;
450 uint32_t gsi = 0;
451 int type = param >> CALLBACK_VIA_TYPE_SHIFT;
452 int ret;
453
454 if (!s) {
455 return -ENOTSUP;
456 }
457
458 /*
459 * We need the BQL because set_callback_pci_intx() may call into PCI code,
460 * and because we may need to manipulate the old and new GSI levels.
461 */
462 assert(bql_locked());
463 qemu_mutex_lock(&s->port_lock);
464
465 switch (type) {
466 case HVM_PARAM_CALLBACK_TYPE_VECTOR: {
467 xa.u.vector = (uint8_t)param,
468
469 ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
470 if (!ret && kvm_xen_has_cap(EVTCHN_SEND)) {
471 in_kernel = true;
472 }
473 gsi = 0;
474 break;
475 }
476
477 case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
478 gsi = set_callback_pci_intx(s, param);
479 ret = gsi ? 0 : -EINVAL;
480 break;
481
482 case HVM_PARAM_CALLBACK_TYPE_GSI:
483 gsi = (uint32_t)param;
484 ret = 0;
485 break;
486
487 default:
488 /* Xen doesn't return error even if you set something bogus */
489 ret = 0;
490 break;
491 }
492
493 /* If the guest has set a per-vCPU callback vector, prefer that. */
494 if (gsi && kvm_xen_has_vcpu_callback_vector()) {
495 in_kernel = kvm_xen_has_cap(EVTCHN_SEND);
496 gsi = 0;
497 }
498
499 if (!ret) {
500 /* If vector delivery was turned *off* then tell the kernel */
501 if ((s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) ==
502 HVM_PARAM_CALLBACK_TYPE_VECTOR && !xa.u.vector) {
503 kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
504 }
505 s->callback_param = param;
506 s->evtchn_in_kernel = in_kernel;
507
508 if (gsi != s->callback_gsi) {
509 struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
510
511 xen_evtchn_set_callback_level(0);
512 s->callback_gsi = gsi;
513
514 if (gsi && vi && vi->evtchn_upcall_pending) {
515 kvm_xen_inject_vcpu_callback_vector(0, type);
516 }
517 }
518 }
519
520 qemu_mutex_unlock(&s->port_lock);
521
522 return ret;
523 }
524
inject_callback(XenEvtchnState * s,uint32_t vcpu)525 static void inject_callback(XenEvtchnState *s, uint32_t vcpu)
526 {
527 int type = s->callback_param >> CALLBACK_VIA_TYPE_SHIFT;
528
529 kvm_xen_inject_vcpu_callback_vector(vcpu, type);
530 }
531
deassign_kernel_port(evtchn_port_t port)532 static void deassign_kernel_port(evtchn_port_t port)
533 {
534 struct kvm_xen_hvm_attr ha;
535 int ret;
536
537 ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
538 ha.u.evtchn.send_port = port;
539 ha.u.evtchn.flags = KVM_XEN_EVTCHN_DEASSIGN;
540
541 ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
542 if (ret) {
543 qemu_log_mask(LOG_GUEST_ERROR, "Failed to unbind kernel port %d: %s\n",
544 port, strerror(ret));
545 }
546 }
547
assign_kernel_port(uint16_t type,evtchn_port_t port,uint32_t vcpu_id)548 static int assign_kernel_port(uint16_t type, evtchn_port_t port,
549 uint32_t vcpu_id)
550 {
551 CPUState *cpu = qemu_get_cpu(vcpu_id);
552 struct kvm_xen_hvm_attr ha;
553
554 if (!cpu) {
555 return -ENOENT;
556 }
557
558 ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
559 ha.u.evtchn.send_port = port;
560 ha.u.evtchn.type = type;
561 ha.u.evtchn.flags = 0;
562 ha.u.evtchn.deliver.port.port = port;
563 ha.u.evtchn.deliver.port.vcpu = kvm_arch_vcpu_id(cpu);
564 ha.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
565
566 return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
567 }
568
assign_kernel_eventfd(uint16_t type,evtchn_port_t port,int fd)569 static int assign_kernel_eventfd(uint16_t type, evtchn_port_t port, int fd)
570 {
571 struct kvm_xen_hvm_attr ha;
572
573 ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
574 ha.u.evtchn.send_port = port;
575 ha.u.evtchn.type = type;
576 ha.u.evtchn.flags = 0;
577 ha.u.evtchn.deliver.eventfd.port = 0;
578 ha.u.evtchn.deliver.eventfd.fd = fd;
579
580 return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
581 }
582
valid_port(evtchn_port_t port)583 static bool valid_port(evtchn_port_t port)
584 {
585 if (!port) {
586 return false;
587 }
588
589 if (xen_is_long_mode()) {
590 return port < EVTCHN_2L_NR_CHANNELS;
591 } else {
592 return port < COMPAT_EVTCHN_2L_NR_CHANNELS;
593 }
594 }
595
valid_vcpu(uint32_t vcpu)596 static bool valid_vcpu(uint32_t vcpu)
597 {
598 return !!qemu_get_cpu(vcpu);
599 }
600
unbind_backend_ports(XenEvtchnState * s)601 static void unbind_backend_ports(XenEvtchnState *s)
602 {
603 XenEvtchnPort *p;
604 int i;
605
606 for (i = 1; i < s->nr_ports; i++) {
607 p = &s->port_table[i];
608 if (p->type == EVTCHNSTAT_interdomain && p->u.interdomain.to_qemu) {
609 evtchn_port_t be_port = p->u.interdomain.port;
610
611 if (s->be_handles[be_port]) {
612 /* This part will be overwritten on the load anyway. */
613 p->type = EVTCHNSTAT_unbound;
614 p->u.interdomain.port = 0;
615
616 /* Leave the backend port open and unbound too. */
617 if (kvm_xen_has_cap(EVTCHN_SEND)) {
618 deassign_kernel_port(i);
619 }
620 s->be_handles[be_port]->guest_port = 0;
621 }
622 }
623 }
624 }
625
xen_evtchn_status_op(struct evtchn_status * status)626 int xen_evtchn_status_op(struct evtchn_status *status)
627 {
628 XenEvtchnState *s = xen_evtchn_singleton;
629 XenEvtchnPort *p;
630
631 if (!s) {
632 return -ENOTSUP;
633 }
634
635 if (status->dom != DOMID_SELF && status->dom != xen_domid) {
636 return -ESRCH;
637 }
638
639 if (!valid_port(status->port)) {
640 return -EINVAL;
641 }
642
643 qemu_mutex_lock(&s->port_lock);
644
645 p = &s->port_table[status->port];
646
647 status->status = p->type;
648 status->vcpu = p->vcpu;
649
650 switch (p->type) {
651 case EVTCHNSTAT_unbound:
652 status->u.unbound.dom = p->u.interdomain.to_qemu ? DOMID_QEMU
653 : xen_domid;
654 break;
655
656 case EVTCHNSTAT_interdomain:
657 status->u.interdomain.dom = p->u.interdomain.to_qemu ? DOMID_QEMU
658 : xen_domid;
659 status->u.interdomain.port = p->u.interdomain.port;
660 break;
661
662 case EVTCHNSTAT_pirq:
663 status->u.pirq = p->u.pirq;
664 break;
665
666 case EVTCHNSTAT_virq:
667 status->u.virq = p->u.virq;
668 break;
669 }
670
671 qemu_mutex_unlock(&s->port_lock);
672 return 0;
673 }
674
675 /*
676 * Never thought I'd hear myself say this, but C++ templates would be
677 * kind of nice here.
678 *
679 * template<class T> static int do_unmask_port(T *shinfo, ...);
680 */
do_unmask_port_lm(XenEvtchnState * s,evtchn_port_t port,bool do_unmask,struct shared_info * shinfo,struct vcpu_info * vcpu_info)681 static int do_unmask_port_lm(XenEvtchnState *s, evtchn_port_t port,
682 bool do_unmask, struct shared_info *shinfo,
683 struct vcpu_info *vcpu_info)
684 {
685 const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
686 typeof(shinfo->evtchn_pending[0]) mask;
687 int idx = port / bits_per_word;
688 int offset = port % bits_per_word;
689
690 mask = 1UL << offset;
691
692 if (idx >= bits_per_word) {
693 return -EINVAL;
694 }
695
696 if (do_unmask) {
697 /*
698 * If this is a true unmask operation, clear the mask bit. If
699 * it was already unmasked, we have nothing further to do.
700 */
701 if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
702 return 0;
703 }
704 } else {
705 /*
706 * This is a pseudo-unmask for affinity changes. We don't
707 * change the mask bit, and if it's *masked* we have nothing
708 * else to do.
709 */
710 if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
711 return 0;
712 }
713 }
714
715 /* If the event was not pending, we're done. */
716 if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
717 return 0;
718 }
719
720 /* Now on to the vcpu_info evtchn_pending_sel index... */
721 mask = 1UL << idx;
722
723 /* If a port in this word was already pending for this vCPU, all done. */
724 if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
725 return 0;
726 }
727
728 /* Set evtchn_upcall_pending for this vCPU */
729 if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
730 return 0;
731 }
732
733 inject_callback(s, s->port_table[port].vcpu);
734
735 return 0;
736 }
737
do_unmask_port_compat(XenEvtchnState * s,evtchn_port_t port,bool do_unmask,struct compat_shared_info * shinfo,struct compat_vcpu_info * vcpu_info)738 static int do_unmask_port_compat(XenEvtchnState *s, evtchn_port_t port,
739 bool do_unmask,
740 struct compat_shared_info *shinfo,
741 struct compat_vcpu_info *vcpu_info)
742 {
743 const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
744 typeof(shinfo->evtchn_pending[0]) mask;
745 int idx = port / bits_per_word;
746 int offset = port % bits_per_word;
747
748 mask = 1UL << offset;
749
750 if (idx >= bits_per_word) {
751 return -EINVAL;
752 }
753
754 if (do_unmask) {
755 /*
756 * If this is a true unmask operation, clear the mask bit. If
757 * it was already unmasked, we have nothing further to do.
758 */
759 if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
760 return 0;
761 }
762 } else {
763 /*
764 * This is a pseudo-unmask for affinity changes. We don't
765 * change the mask bit, and if it's *masked* we have nothing
766 * else to do.
767 */
768 if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
769 return 0;
770 }
771 }
772
773 /* If the event was not pending, we're done. */
774 if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
775 return 0;
776 }
777
778 /* Now on to the vcpu_info evtchn_pending_sel index... */
779 mask = 1UL << idx;
780
781 /* If a port in this word was already pending for this vCPU, all done. */
782 if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
783 return 0;
784 }
785
786 /* Set evtchn_upcall_pending for this vCPU */
787 if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
788 return 0;
789 }
790
791 inject_callback(s, s->port_table[port].vcpu);
792
793 return 0;
794 }
795
unmask_port(XenEvtchnState * s,evtchn_port_t port,bool do_unmask)796 static int unmask_port(XenEvtchnState *s, evtchn_port_t port, bool do_unmask)
797 {
798 void *vcpu_info, *shinfo;
799
800 if (s->port_table[port].type == EVTCHNSTAT_closed) {
801 return -EINVAL;
802 }
803
804 shinfo = xen_overlay_get_shinfo_ptr();
805 if (!shinfo) {
806 return -ENOTSUP;
807 }
808
809 vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
810 if (!vcpu_info) {
811 return -EINVAL;
812 }
813
814 if (xen_is_long_mode()) {
815 return do_unmask_port_lm(s, port, do_unmask, shinfo, vcpu_info);
816 } else {
817 return do_unmask_port_compat(s, port, do_unmask, shinfo, vcpu_info);
818 }
819 }
820
do_set_port_lm(XenEvtchnState * s,evtchn_port_t port,struct shared_info * shinfo,struct vcpu_info * vcpu_info)821 static int do_set_port_lm(XenEvtchnState *s, evtchn_port_t port,
822 struct shared_info *shinfo,
823 struct vcpu_info *vcpu_info)
824 {
825 const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
826 typeof(shinfo->evtchn_pending[0]) mask;
827 int idx = port / bits_per_word;
828 int offset = port % bits_per_word;
829
830 mask = 1UL << offset;
831
832 if (idx >= bits_per_word) {
833 return -EINVAL;
834 }
835
836 /* Update the pending bit itself. If it was already set, we're done. */
837 if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
838 return 0;
839 }
840
841 /* Check if it's masked. */
842 if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
843 return 0;
844 }
845
846 /* Now on to the vcpu_info evtchn_pending_sel index... */
847 mask = 1UL << idx;
848
849 /* If a port in this word was already pending for this vCPU, all done. */
850 if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
851 return 0;
852 }
853
854 /* Set evtchn_upcall_pending for this vCPU */
855 if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
856 return 0;
857 }
858
859 inject_callback(s, s->port_table[port].vcpu);
860
861 return 0;
862 }
863
do_set_port_compat(XenEvtchnState * s,evtchn_port_t port,struct compat_shared_info * shinfo,struct compat_vcpu_info * vcpu_info)864 static int do_set_port_compat(XenEvtchnState *s, evtchn_port_t port,
865 struct compat_shared_info *shinfo,
866 struct compat_vcpu_info *vcpu_info)
867 {
868 const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
869 typeof(shinfo->evtchn_pending[0]) mask;
870 int idx = port / bits_per_word;
871 int offset = port % bits_per_word;
872
873 mask = 1UL << offset;
874
875 if (idx >= bits_per_word) {
876 return -EINVAL;
877 }
878
879 /* Update the pending bit itself. If it was already set, we're done. */
880 if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
881 return 0;
882 }
883
884 /* Check if it's masked. */
885 if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
886 return 0;
887 }
888
889 /* Now on to the vcpu_info evtchn_pending_sel index... */
890 mask = 1UL << idx;
891
892 /* If a port in this word was already pending for this vCPU, all done. */
893 if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
894 return 0;
895 }
896
897 /* Set evtchn_upcall_pending for this vCPU */
898 if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
899 return 0;
900 }
901
902 inject_callback(s, s->port_table[port].vcpu);
903
904 return 0;
905 }
906
set_port_pending(XenEvtchnState * s,evtchn_port_t port)907 static int set_port_pending(XenEvtchnState *s, evtchn_port_t port)
908 {
909 void *vcpu_info, *shinfo;
910
911 if (s->port_table[port].type == EVTCHNSTAT_closed) {
912 return -EINVAL;
913 }
914
915 if (s->evtchn_in_kernel) {
916 XenEvtchnPort *p = &s->port_table[port];
917 CPUState *cpu = qemu_get_cpu(p->vcpu);
918 struct kvm_irq_routing_xen_evtchn evt;
919
920 if (!cpu) {
921 return 0;
922 }
923
924 evt.port = port;
925 evt.vcpu = kvm_arch_vcpu_id(cpu);
926 evt.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
927
928 return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_EVTCHN_SEND, &evt);
929 }
930
931 shinfo = xen_overlay_get_shinfo_ptr();
932 if (!shinfo) {
933 return -ENOTSUP;
934 }
935
936 vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
937 if (!vcpu_info) {
938 return -EINVAL;
939 }
940
941 if (xen_is_long_mode()) {
942 return do_set_port_lm(s, port, shinfo, vcpu_info);
943 } else {
944 return do_set_port_compat(s, port, shinfo, vcpu_info);
945 }
946 }
947
clear_port_pending(XenEvtchnState * s,evtchn_port_t port)948 static int clear_port_pending(XenEvtchnState *s, evtchn_port_t port)
949 {
950 void *p = xen_overlay_get_shinfo_ptr();
951
952 if (!p) {
953 return -ENOTSUP;
954 }
955
956 if (xen_is_long_mode()) {
957 struct shared_info *shinfo = p;
958 const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
959 typeof(shinfo->evtchn_pending[0]) mask;
960 int idx = port / bits_per_word;
961 int offset = port % bits_per_word;
962
963 mask = 1UL << offset;
964
965 qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
966 } else {
967 struct compat_shared_info *shinfo = p;
968 const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
969 typeof(shinfo->evtchn_pending[0]) mask;
970 int idx = port / bits_per_word;
971 int offset = port % bits_per_word;
972
973 mask = 1UL << offset;
974
975 qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
976 }
977 return 0;
978 }
979
free_port(XenEvtchnState * s,evtchn_port_t port)980 static void free_port(XenEvtchnState *s, evtchn_port_t port)
981 {
982 s->port_table[port].type = EVTCHNSTAT_closed;
983 s->port_table[port].u.val = 0;
984 s->port_table[port].vcpu = 0;
985
986 if (s->nr_ports == port + 1) {
987 do {
988 s->nr_ports--;
989 } while (s->nr_ports &&
990 s->port_table[s->nr_ports - 1].type == EVTCHNSTAT_closed);
991 }
992
993 /* Clear pending event to avoid unexpected behavior on re-bind. */
994 clear_port_pending(s, port);
995 }
996
allocate_port(XenEvtchnState * s,uint32_t vcpu,uint16_t type,uint16_t val,evtchn_port_t * port)997 static int allocate_port(XenEvtchnState *s, uint32_t vcpu, uint16_t type,
998 uint16_t val, evtchn_port_t *port)
999 {
1000 evtchn_port_t p = 1;
1001
1002 for (p = 1; valid_port(p); p++) {
1003 if (s->port_table[p].type == EVTCHNSTAT_closed) {
1004 s->port_table[p].vcpu = vcpu;
1005 s->port_table[p].type = type;
1006 s->port_table[p].u.val = val;
1007
1008 *port = p;
1009
1010 if (s->nr_ports < p + 1) {
1011 s->nr_ports = p + 1;
1012 }
1013
1014 return 0;
1015 }
1016 }
1017 return -ENOSPC;
1018 }
1019
virq_is_global(uint32_t virq)1020 static bool virq_is_global(uint32_t virq)
1021 {
1022 switch (virq) {
1023 case VIRQ_TIMER:
1024 case VIRQ_DEBUG:
1025 case VIRQ_XENOPROF:
1026 case VIRQ_XENPMU:
1027 return false;
1028
1029 default:
1030 return true;
1031 }
1032 }
1033
close_port(XenEvtchnState * s,evtchn_port_t port,bool * flush_kvm_routes)1034 static int close_port(XenEvtchnState *s, evtchn_port_t port,
1035 bool *flush_kvm_routes)
1036 {
1037 XenEvtchnPort *p = &s->port_table[port];
1038
1039 /* Because it *might* be a PIRQ port */
1040 assert(bql_locked());
1041
1042 switch (p->type) {
1043 case EVTCHNSTAT_closed:
1044 return -ENOENT;
1045
1046 case EVTCHNSTAT_pirq:
1047 s->pirq[p->u.pirq].port = 0;
1048 if (s->pirq[p->u.pirq].is_translated) {
1049 *flush_kvm_routes = true;
1050 }
1051 break;
1052
1053 case EVTCHNSTAT_virq:
1054 kvm_xen_set_vcpu_virq(virq_is_global(p->u.virq) ? 0 : p->vcpu,
1055 p->u.virq, 0);
1056 break;
1057
1058 case EVTCHNSTAT_ipi:
1059 if (s->evtchn_in_kernel) {
1060 deassign_kernel_port(port);
1061 }
1062 break;
1063
1064 case EVTCHNSTAT_interdomain:
1065 if (p->u.interdomain.to_qemu) {
1066 uint16_t be_port = p->u.interdomain.port;
1067 struct xenevtchn_handle *xc = s->be_handles[be_port];
1068 if (xc) {
1069 if (kvm_xen_has_cap(EVTCHN_SEND)) {
1070 deassign_kernel_port(port);
1071 }
1072 xc->guest_port = 0;
1073 }
1074 } else {
1075 /* Loopback interdomain */
1076 XenEvtchnPort *rp = &s->port_table[p->u.interdomain.port];
1077 if (!valid_port(p->u.interdomain.port) ||
1078 rp->u.interdomain.port != port ||
1079 rp->type != EVTCHNSTAT_interdomain) {
1080 error_report("Inconsistent state for interdomain unbind");
1081 } else {
1082 /* Set the other end back to unbound */
1083 rp->type = EVTCHNSTAT_unbound;
1084 rp->u.interdomain.port = 0;
1085 }
1086 }
1087 break;
1088
1089 default:
1090 break;
1091 }
1092
1093 free_port(s, port);
1094 return 0;
1095 }
1096
xen_evtchn_soft_reset(void)1097 int xen_evtchn_soft_reset(void)
1098 {
1099 XenEvtchnState *s = xen_evtchn_singleton;
1100 bool flush_kvm_routes = false;
1101 int i;
1102
1103 if (!s) {
1104 return -ENOTSUP;
1105 }
1106
1107 assert(bql_locked());
1108
1109 qemu_mutex_lock(&s->port_lock);
1110
1111 for (i = 0; i < s->nr_ports; i++) {
1112 close_port(s, i, &flush_kvm_routes);
1113 }
1114
1115 qemu_mutex_unlock(&s->port_lock);
1116
1117 if (flush_kvm_routes) {
1118 kvm_update_msi_routes_all(NULL, true, 0, 0);
1119 }
1120
1121 return 0;
1122 }
1123
xen_evtchn_reset_op(struct evtchn_reset * reset)1124 int xen_evtchn_reset_op(struct evtchn_reset *reset)
1125 {
1126 if (reset->dom != DOMID_SELF && reset->dom != xen_domid) {
1127 return -ESRCH;
1128 }
1129
1130 BQL_LOCK_GUARD();
1131 return xen_evtchn_soft_reset();
1132 }
1133
xen_evtchn_close_op(struct evtchn_close * close)1134 int xen_evtchn_close_op(struct evtchn_close *close)
1135 {
1136 XenEvtchnState *s = xen_evtchn_singleton;
1137 bool flush_kvm_routes = false;
1138 int ret;
1139
1140 if (!s) {
1141 return -ENOTSUP;
1142 }
1143
1144 if (!valid_port(close->port)) {
1145 return -EINVAL;
1146 }
1147
1148 BQL_LOCK_GUARD();
1149 qemu_mutex_lock(&s->port_lock);
1150
1151 ret = close_port(s, close->port, &flush_kvm_routes);
1152
1153 qemu_mutex_unlock(&s->port_lock);
1154
1155 if (flush_kvm_routes) {
1156 kvm_update_msi_routes_all(NULL, true, 0, 0);
1157 }
1158
1159 return ret;
1160 }
1161
xen_evtchn_unmask_op(struct evtchn_unmask * unmask)1162 int xen_evtchn_unmask_op(struct evtchn_unmask *unmask)
1163 {
1164 XenEvtchnState *s = xen_evtchn_singleton;
1165 int ret;
1166
1167 if (!s) {
1168 return -ENOTSUP;
1169 }
1170
1171 if (!valid_port(unmask->port)) {
1172 return -EINVAL;
1173 }
1174
1175 qemu_mutex_lock(&s->port_lock);
1176
1177 ret = unmask_port(s, unmask->port, true);
1178
1179 qemu_mutex_unlock(&s->port_lock);
1180
1181 return ret;
1182 }
1183
xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu * vcpu)1184 int xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu *vcpu)
1185 {
1186 XenEvtchnState *s = xen_evtchn_singleton;
1187 XenEvtchnPort *p;
1188 int ret = -EINVAL;
1189
1190 if (!s) {
1191 return -ENOTSUP;
1192 }
1193
1194 if (!valid_port(vcpu->port)) {
1195 return -EINVAL;
1196 }
1197
1198 if (!valid_vcpu(vcpu->vcpu)) {
1199 return -ENOENT;
1200 }
1201
1202 qemu_mutex_lock(&s->port_lock);
1203
1204 p = &s->port_table[vcpu->port];
1205
1206 if (p->type == EVTCHNSTAT_interdomain ||
1207 p->type == EVTCHNSTAT_unbound ||
1208 p->type == EVTCHNSTAT_pirq ||
1209 (p->type == EVTCHNSTAT_virq && virq_is_global(p->u.virq))) {
1210 /*
1211 * unmask_port() with do_unmask==false will just raise the event
1212 * on the new vCPU if the port was already pending.
1213 */
1214 p->vcpu = vcpu->vcpu;
1215 unmask_port(s, vcpu->port, false);
1216 ret = 0;
1217 }
1218
1219 qemu_mutex_unlock(&s->port_lock);
1220
1221 return ret;
1222 }
1223
xen_evtchn_bind_virq_op(struct evtchn_bind_virq * virq)1224 int xen_evtchn_bind_virq_op(struct evtchn_bind_virq *virq)
1225 {
1226 XenEvtchnState *s = xen_evtchn_singleton;
1227 int ret;
1228
1229 if (!s) {
1230 return -ENOTSUP;
1231 }
1232
1233 if (virq->virq >= NR_VIRQS) {
1234 return -EINVAL;
1235 }
1236
1237 /* Global VIRQ must be allocated on vCPU0 first */
1238 if (virq_is_global(virq->virq) && virq->vcpu != 0) {
1239 return -EINVAL;
1240 }
1241
1242 if (!valid_vcpu(virq->vcpu)) {
1243 return -ENOENT;
1244 }
1245
1246 qemu_mutex_lock(&s->port_lock);
1247
1248 ret = allocate_port(s, virq->vcpu, EVTCHNSTAT_virq, virq->virq,
1249 &virq->port);
1250 if (!ret) {
1251 ret = kvm_xen_set_vcpu_virq(virq->vcpu, virq->virq, virq->port);
1252 if (ret) {
1253 free_port(s, virq->port);
1254 }
1255 }
1256
1257 qemu_mutex_unlock(&s->port_lock);
1258
1259 return ret;
1260 }
1261
xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq * pirq)1262 int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq *pirq)
1263 {
1264 XenEvtchnState *s = xen_evtchn_singleton;
1265 int ret;
1266
1267 if (!s) {
1268 return -ENOTSUP;
1269 }
1270
1271 if (pirq->pirq >= s->nr_pirqs) {
1272 return -EINVAL;
1273 }
1274
1275 BQL_LOCK_GUARD();
1276
1277 if (s->pirq[pirq->pirq].port) {
1278 return -EBUSY;
1279 }
1280
1281 qemu_mutex_lock(&s->port_lock);
1282
1283 ret = allocate_port(s, 0, EVTCHNSTAT_pirq, pirq->pirq,
1284 &pirq->port);
1285 if (ret) {
1286 qemu_mutex_unlock(&s->port_lock);
1287 return ret;
1288 }
1289
1290 s->pirq[pirq->pirq].port = pirq->port;
1291 trace_kvm_xen_bind_pirq(pirq->pirq, pirq->port);
1292
1293 qemu_mutex_unlock(&s->port_lock);
1294
1295 /*
1296 * Need to do the unmask outside port_lock because it may call
1297 * back into the MSI translate function.
1298 */
1299 if (s->pirq[pirq->pirq].gsi == IRQ_MSI_EMU) {
1300 if (s->pirq[pirq->pirq].is_masked) {
1301 PCIDevice *dev = s->pirq[pirq->pirq].dev;
1302 int vector = s->pirq[pirq->pirq].vector;
1303 char *dev_path = qdev_get_dev_path(DEVICE(dev));
1304
1305 trace_kvm_xen_unmask_pirq(pirq->pirq, dev_path, vector);
1306 g_free(dev_path);
1307
1308 if (s->pirq[pirq->pirq].is_msix) {
1309 msix_set_mask(dev, vector, false);
1310 } else {
1311 msi_set_mask(dev, vector, false, NULL);
1312 }
1313 } else if (s->pirq[pirq->pirq].is_translated) {
1314 /*
1315 * If KVM had attempted to translate this one before, make it try
1316 * again. If we unmasked, then the notifier on the MSI(-X) vector
1317 * will already have had the same effect.
1318 */
1319 kvm_update_msi_routes_all(NULL, true, 0, 0);
1320 }
1321 }
1322
1323 return ret;
1324 }
1325
xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi * ipi)1326 int xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi *ipi)
1327 {
1328 XenEvtchnState *s = xen_evtchn_singleton;
1329 int ret;
1330
1331 if (!s) {
1332 return -ENOTSUP;
1333 }
1334
1335 if (!valid_vcpu(ipi->vcpu)) {
1336 return -ENOENT;
1337 }
1338
1339 qemu_mutex_lock(&s->port_lock);
1340
1341 ret = allocate_port(s, ipi->vcpu, EVTCHNSTAT_ipi, 0, &ipi->port);
1342 if (!ret && s->evtchn_in_kernel) {
1343 assign_kernel_port(EVTCHNSTAT_ipi, ipi->port, ipi->vcpu);
1344 }
1345
1346 qemu_mutex_unlock(&s->port_lock);
1347
1348 return ret;
1349 }
1350
xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain * interdomain)1351 int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain)
1352 {
1353 XenEvtchnState *s = xen_evtchn_singleton;
1354 int ret;
1355
1356 if (!s) {
1357 return -ENOTSUP;
1358 }
1359
1360 if (interdomain->remote_dom != DOMID_QEMU &&
1361 interdomain->remote_dom != DOMID_SELF &&
1362 interdomain->remote_dom != xen_domid) {
1363 return -ESRCH;
1364 }
1365
1366 if (!valid_port(interdomain->remote_port)) {
1367 return -EINVAL;
1368 }
1369
1370 qemu_mutex_lock(&s->port_lock);
1371
1372 /* The newly allocated port starts out as unbound */
1373 ret = allocate_port(s, 0, EVTCHNSTAT_unbound, 0, &interdomain->local_port);
1374
1375 if (ret) {
1376 goto out;
1377 }
1378
1379 if (interdomain->remote_dom == DOMID_QEMU) {
1380 struct xenevtchn_handle *xc = s->be_handles[interdomain->remote_port];
1381 XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1382
1383 if (!xc) {
1384 ret = -ENOENT;
1385 goto out_free_port;
1386 }
1387
1388 if (xc->guest_port) {
1389 ret = -EBUSY;
1390 goto out_free_port;
1391 }
1392
1393 assert(xc->be_port == interdomain->remote_port);
1394 xc->guest_port = interdomain->local_port;
1395 if (kvm_xen_has_cap(EVTCHN_SEND)) {
1396 assign_kernel_eventfd(lp->type, xc->guest_port, xc->fd);
1397 }
1398 lp->type = EVTCHNSTAT_interdomain;
1399 lp->u.interdomain.to_qemu = 1;
1400 lp->u.interdomain.port = interdomain->remote_port;
1401 ret = 0;
1402 } else {
1403 /* Loopback */
1404 XenEvtchnPort *rp = &s->port_table[interdomain->remote_port];
1405 XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1406
1407 /*
1408 * The 'remote' port for loopback must be an unbound port allocated
1409 * for communication with the local domain, and must *not* be the
1410 * port that was just allocated for the local end.
1411 */
1412 if (interdomain->local_port != interdomain->remote_port &&
1413 rp->type == EVTCHNSTAT_unbound && !rp->u.interdomain.to_qemu) {
1414
1415 rp->type = EVTCHNSTAT_interdomain;
1416 rp->u.interdomain.port = interdomain->local_port;
1417
1418 lp->type = EVTCHNSTAT_interdomain;
1419 lp->u.interdomain.port = interdomain->remote_port;
1420 } else {
1421 ret = -EINVAL;
1422 }
1423 }
1424
1425 out_free_port:
1426 if (ret) {
1427 free_port(s, interdomain->local_port);
1428 }
1429 out:
1430 qemu_mutex_unlock(&s->port_lock);
1431
1432 return ret;
1433
1434 }
xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound * alloc)1435 int xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound *alloc)
1436 {
1437 XenEvtchnState *s = xen_evtchn_singleton;
1438 int ret;
1439
1440 if (!s) {
1441 return -ENOTSUP;
1442 }
1443
1444 if (alloc->dom != DOMID_SELF && alloc->dom != xen_domid) {
1445 return -ESRCH;
1446 }
1447
1448 if (alloc->remote_dom != DOMID_QEMU &&
1449 alloc->remote_dom != DOMID_SELF &&
1450 alloc->remote_dom != xen_domid) {
1451 return -EPERM;
1452 }
1453
1454 qemu_mutex_lock(&s->port_lock);
1455
1456 ret = allocate_port(s, 0, EVTCHNSTAT_unbound, 0, &alloc->port);
1457
1458 if (!ret && alloc->remote_dom == DOMID_QEMU) {
1459 XenEvtchnPort *p = &s->port_table[alloc->port];
1460 p->u.interdomain.to_qemu = 1;
1461 }
1462
1463 qemu_mutex_unlock(&s->port_lock);
1464
1465 return ret;
1466 }
1467
xen_evtchn_send_op(struct evtchn_send * send)1468 int xen_evtchn_send_op(struct evtchn_send *send)
1469 {
1470 XenEvtchnState *s = xen_evtchn_singleton;
1471 XenEvtchnPort *p;
1472 int ret = 0;
1473
1474 if (!s) {
1475 return -ENOTSUP;
1476 }
1477
1478 if (!valid_port(send->port)) {
1479 return -EINVAL;
1480 }
1481
1482 qemu_mutex_lock(&s->port_lock);
1483
1484 p = &s->port_table[send->port];
1485
1486 switch (p->type) {
1487 case EVTCHNSTAT_interdomain:
1488 if (p->u.interdomain.to_qemu) {
1489 /*
1490 * This is an event from the guest to qemu itself, which is
1491 * serving as the driver domain.
1492 */
1493 uint16_t be_port = p->u.interdomain.port;
1494 struct xenevtchn_handle *xc = s->be_handles[be_port];
1495 if (xc) {
1496 eventfd_write(xc->fd, 1);
1497 ret = 0;
1498 } else {
1499 ret = -ENOENT;
1500 }
1501 } else {
1502 /* Loopback interdomain ports; just a complex IPI */
1503 set_port_pending(s, p->u.interdomain.port);
1504 }
1505 break;
1506
1507 case EVTCHNSTAT_ipi:
1508 set_port_pending(s, send->port);
1509 break;
1510
1511 case EVTCHNSTAT_unbound:
1512 /* Xen will silently drop these */
1513 break;
1514
1515 default:
1516 ret = -EINVAL;
1517 break;
1518 }
1519
1520 qemu_mutex_unlock(&s->port_lock);
1521
1522 return ret;
1523 }
1524
xen_evtchn_set_port(uint16_t port)1525 int xen_evtchn_set_port(uint16_t port)
1526 {
1527 XenEvtchnState *s = xen_evtchn_singleton;
1528 XenEvtchnPort *p;
1529 int ret = -EINVAL;
1530
1531 if (!s) {
1532 return -ENOTSUP;
1533 }
1534
1535 if (!valid_port(port)) {
1536 return -EINVAL;
1537 }
1538
1539 qemu_mutex_lock(&s->port_lock);
1540
1541 p = &s->port_table[port];
1542
1543 /* QEMU has no business sending to anything but these */
1544 if (p->type == EVTCHNSTAT_virq ||
1545 (p->type == EVTCHNSTAT_interdomain && p->u.interdomain.to_qemu)) {
1546 set_port_pending(s, port);
1547 ret = 0;
1548 }
1549
1550 qemu_mutex_unlock(&s->port_lock);
1551
1552 return ret;
1553 }
1554
allocate_pirq(XenEvtchnState * s,int type,int gsi)1555 static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
1556 {
1557 uint16_t pirq;
1558
1559 /*
1560 * Preserve the allocation strategy that Xen has. It looks like
1561 * we *never* give out PIRQ 0-15, we give out 16-nr_irqs_gsi only
1562 * to GSIs (counting up from 16), and then we count backwards from
1563 * the top for MSIs or when the GSI space is exhausted.
1564 */
1565 if (type == MAP_PIRQ_TYPE_GSI) {
1566 for (pirq = 16 ; pirq < IOAPIC_NUM_PINS; pirq++) {
1567 if (pirq_inuse(s, pirq)) {
1568 continue;
1569 }
1570
1571 /* Found it */
1572 goto found;
1573 }
1574 }
1575 for (pirq = s->nr_pirqs - 1; pirq >= IOAPIC_NUM_PINS; pirq--) {
1576 /* Skip whole words at a time when they're full */
1577 if (pirq_inuse_word(s, pirq) == UINT64_MAX) {
1578 pirq &= ~63ULL;
1579 continue;
1580 }
1581 if (pirq_inuse(s, pirq)) {
1582 continue;
1583 }
1584
1585 goto found;
1586 }
1587 return -ENOSPC;
1588
1589 found:
1590 pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1591 if (gsi >= 0) {
1592 assert(gsi < IOAPIC_NUM_PINS);
1593 s->gsi_pirq[gsi] = pirq;
1594 }
1595 s->pirq[pirq].gsi = gsi;
1596 return pirq;
1597 }
1598
xen_evtchn_set_gsi(int gsi,int level)1599 bool xen_evtchn_set_gsi(int gsi, int level)
1600 {
1601 XenEvtchnState *s = xen_evtchn_singleton;
1602 int pirq;
1603
1604 assert(bql_locked());
1605
1606 if (!s || gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
1607 return false;
1608 }
1609
1610 /*
1611 * Check that that it *isn't* the event channel GSI, and thus
1612 * that we are not recursing and it's safe to take s->port_lock.
1613 *
1614 * Locking aside, it's perfectly sane to bail out early for that
1615 * special case, as it would make no sense for the event channel
1616 * GSI to be routed back to event channels, when the delivery
1617 * method is to raise the GSI... that recursion wouldn't *just*
1618 * be a locking issue.
1619 */
1620 if (gsi && gsi == s->callback_gsi) {
1621 return false;
1622 }
1623
1624 QEMU_LOCK_GUARD(&s->port_lock);
1625
1626 pirq = s->gsi_pirq[gsi];
1627 if (!pirq) {
1628 return false;
1629 }
1630
1631 if (level) {
1632 int port = s->pirq[pirq].port;
1633
1634 s->pirq_gsi_set |= (1U << gsi);
1635 if (port) {
1636 set_port_pending(s, port);
1637 }
1638 } else {
1639 s->pirq_gsi_set &= ~(1U << gsi);
1640 }
1641 return true;
1642 }
1643
msi_pirq_target(uint64_t addr,uint32_t data)1644 static uint32_t msi_pirq_target(uint64_t addr, uint32_t data)
1645 {
1646 /* The vector (in low 8 bits of data) must be zero */
1647 if (data & 0xff) {
1648 return 0;
1649 }
1650
1651 uint32_t pirq = (addr & 0xff000) >> 12;
1652 pirq |= (addr >> 32) & 0xffffff00;
1653
1654 return pirq;
1655 }
1656
do_remove_pci_vector(XenEvtchnState * s,PCIDevice * dev,int vector,int except_pirq)1657 static void do_remove_pci_vector(XenEvtchnState *s, PCIDevice *dev, int vector,
1658 int except_pirq)
1659 {
1660 uint32_t pirq;
1661
1662 for (pirq = 0; pirq < s->nr_pirqs; pirq++) {
1663 /*
1664 * We could be cleverer here, but it isn't really a fast path, and
1665 * this trivial optimisation is enough to let us skip the big gap
1666 * in the middle a bit quicker (in terms of both loop iterations,
1667 * and cache lines).
1668 */
1669 if (!(pirq & 63) && !(pirq_inuse_word(s, pirq))) {
1670 pirq += 64;
1671 continue;
1672 }
1673 if (except_pirq && pirq == except_pirq) {
1674 continue;
1675 }
1676 if (s->pirq[pirq].dev != dev) {
1677 continue;
1678 }
1679 if (vector != -1 && s->pirq[pirq].vector != vector) {
1680 continue;
1681 }
1682
1683 /* It could theoretically be bound to a port already, but that is OK. */
1684 s->pirq[pirq].dev = dev;
1685 s->pirq[pirq].gsi = IRQ_UNBOUND;
1686 s->pirq[pirq].is_msix = false;
1687 s->pirq[pirq].vector = 0;
1688 s->pirq[pirq].is_masked = false;
1689 s->pirq[pirq].is_translated = false;
1690 }
1691 }
1692
xen_evtchn_remove_pci_device(PCIDevice * dev)1693 void xen_evtchn_remove_pci_device(PCIDevice *dev)
1694 {
1695 XenEvtchnState *s = xen_evtchn_singleton;
1696
1697 if (!s) {
1698 return;
1699 }
1700
1701 QEMU_LOCK_GUARD(&s->port_lock);
1702 do_remove_pci_vector(s, dev, -1, 0);
1703 }
1704
xen_evtchn_snoop_msi(PCIDevice * dev,bool is_msix,unsigned int vector,uint64_t addr,uint32_t data,bool is_masked)1705 void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
1706 uint64_t addr, uint32_t data, bool is_masked)
1707 {
1708 XenEvtchnState *s = xen_evtchn_singleton;
1709 uint32_t pirq;
1710
1711 if (!s) {
1712 return;
1713 }
1714
1715 assert(bql_locked());
1716
1717 pirq = msi_pirq_target(addr, data);
1718
1719 /*
1720 * The PIRQ# must be sane, and there must be an allocated PIRQ in
1721 * IRQ_UNBOUND or IRQ_MSI_EMU state to match it.
1722 */
1723 if (!pirq || pirq >= s->nr_pirqs || !pirq_inuse(s, pirq) ||
1724 (s->pirq[pirq].gsi != IRQ_UNBOUND &&
1725 s->pirq[pirq].gsi != IRQ_MSI_EMU)) {
1726 pirq = 0;
1727 }
1728
1729 if (pirq) {
1730 s->pirq[pirq].dev = dev;
1731 s->pirq[pirq].gsi = IRQ_MSI_EMU;
1732 s->pirq[pirq].is_msix = is_msix;
1733 s->pirq[pirq].vector = vector;
1734 s->pirq[pirq].is_masked = is_masked;
1735 }
1736
1737 /* Remove any (other) entries for this {device, vector} */
1738 do_remove_pci_vector(s, dev, vector, pirq);
1739 }
1740
xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry * route,uint64_t address,uint32_t data)1741 int xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route,
1742 uint64_t address, uint32_t data)
1743 {
1744 XenEvtchnState *s = xen_evtchn_singleton;
1745 uint32_t pirq, port;
1746 CPUState *cpu;
1747
1748 if (!s) {
1749 return 1; /* Not a PIRQ */
1750 }
1751
1752 assert(bql_locked());
1753
1754 pirq = msi_pirq_target(address, data);
1755 if (!pirq || pirq >= s->nr_pirqs) {
1756 return 1; /* Not a PIRQ */
1757 }
1758
1759 if (!kvm_xen_has_cap(EVTCHN_2LEVEL)) {
1760 return -ENOTSUP;
1761 }
1762
1763 if (s->pirq[pirq].gsi != IRQ_MSI_EMU) {
1764 return -EINVAL;
1765 }
1766
1767 /* Remember that KVM tried to translate this. It might need to try again. */
1768 s->pirq[pirq].is_translated = true;
1769
1770 QEMU_LOCK_GUARD(&s->port_lock);
1771
1772 port = s->pirq[pirq].port;
1773 if (!valid_port(port)) {
1774 return -EINVAL;
1775 }
1776
1777 cpu = qemu_get_cpu(s->port_table[port].vcpu);
1778 if (!cpu) {
1779 return -EINVAL;
1780 }
1781
1782 route->type = KVM_IRQ_ROUTING_XEN_EVTCHN;
1783 route->u.xen_evtchn.port = port;
1784 route->u.xen_evtchn.vcpu = kvm_arch_vcpu_id(cpu);
1785 route->u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
1786
1787 return 0; /* Handled */
1788 }
1789
xen_evtchn_deliver_pirq_msi(uint64_t address,uint32_t data)1790 bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data)
1791 {
1792 XenEvtchnState *s = xen_evtchn_singleton;
1793 uint32_t pirq, port;
1794
1795 if (!s) {
1796 return false;
1797 }
1798
1799 assert(bql_locked());
1800
1801 pirq = msi_pirq_target(address, data);
1802 if (!pirq || pirq >= s->nr_pirqs) {
1803 return false;
1804 }
1805
1806 QEMU_LOCK_GUARD(&s->port_lock);
1807
1808 port = s->pirq[pirq].port;
1809 if (!valid_port(port)) {
1810 return false;
1811 }
1812
1813 set_port_pending(s, port);
1814 return true;
1815 }
1816
xen_physdev_map_pirq(struct physdev_map_pirq * map)1817 int xen_physdev_map_pirq(struct physdev_map_pirq *map)
1818 {
1819 XenEvtchnState *s = xen_evtchn_singleton;
1820 int pirq = map->pirq;
1821 int gsi = map->index;
1822
1823 if (!s) {
1824 return -ENOTSUP;
1825 }
1826
1827 BQL_LOCK_GUARD();
1828 QEMU_LOCK_GUARD(&s->port_lock);
1829
1830 if (map->domid != DOMID_SELF && map->domid != xen_domid) {
1831 return -EPERM;
1832 }
1833 if (map->type != MAP_PIRQ_TYPE_GSI) {
1834 return -EINVAL;
1835 }
1836 if (gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
1837 return -EINVAL;
1838 }
1839
1840 if (pirq < 0) {
1841 pirq = allocate_pirq(s, map->type, gsi);
1842 if (pirq < 0) {
1843 return pirq;
1844 }
1845 map->pirq = pirq;
1846 } else if (pirq > s->nr_pirqs) {
1847 return -EINVAL;
1848 } else {
1849 /*
1850 * User specified a valid-looking PIRQ#. Allow it if it is
1851 * allocated and not yet bound, or if it is unallocated
1852 */
1853 if (pirq_inuse(s, pirq)) {
1854 if (s->pirq[pirq].gsi != IRQ_UNBOUND) {
1855 return -EBUSY;
1856 }
1857 } else {
1858 /* If it was unused, mark it used now. */
1859 pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1860 }
1861 /* Set the mapping in both directions. */
1862 s->pirq[pirq].gsi = gsi;
1863 s->gsi_pirq[gsi] = pirq;
1864 }
1865
1866 trace_kvm_xen_map_pirq(pirq, gsi);
1867 return 0;
1868 }
1869
xen_physdev_unmap_pirq(struct physdev_unmap_pirq * unmap)1870 int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
1871 {
1872 XenEvtchnState *s = xen_evtchn_singleton;
1873 int pirq = unmap->pirq;
1874 int gsi;
1875
1876 if (!s) {
1877 return -ENOTSUP;
1878 }
1879
1880 if (unmap->domid != DOMID_SELF && unmap->domid != xen_domid) {
1881 return -EPERM;
1882 }
1883 if (pirq < 0 || pirq >= s->nr_pirqs) {
1884 return -EINVAL;
1885 }
1886
1887 BQL_LOCK_GUARD();
1888 qemu_mutex_lock(&s->port_lock);
1889
1890 if (!pirq_inuse(s, pirq)) {
1891 qemu_mutex_unlock(&s->port_lock);
1892 return -ENOENT;
1893 }
1894
1895 gsi = s->pirq[pirq].gsi;
1896
1897 /* We can only unmap GSI PIRQs */
1898 if (gsi < 0) {
1899 qemu_mutex_unlock(&s->port_lock);
1900 return -EINVAL;
1901 }
1902
1903 s->gsi_pirq[gsi] = 0;
1904 s->pirq[pirq].gsi = IRQ_UNBOUND; /* Doesn't actually matter because: */
1905 pirq_inuse_word(s, pirq) &= ~pirq_inuse_bit(pirq);
1906
1907 trace_kvm_xen_unmap_pirq(pirq, gsi);
1908 qemu_mutex_unlock(&s->port_lock);
1909
1910 if (gsi == IRQ_MSI_EMU) {
1911 kvm_update_msi_routes_all(NULL, true, 0, 0);
1912 }
1913
1914 return 0;
1915 }
1916
xen_physdev_eoi_pirq(struct physdev_eoi * eoi)1917 int xen_physdev_eoi_pirq(struct physdev_eoi *eoi)
1918 {
1919 XenEvtchnState *s = xen_evtchn_singleton;
1920 int pirq = eoi->irq;
1921 int gsi;
1922
1923 if (!s) {
1924 return -ENOTSUP;
1925 }
1926
1927 BQL_LOCK_GUARD();
1928 QEMU_LOCK_GUARD(&s->port_lock);
1929
1930 if (!pirq_inuse(s, pirq)) {
1931 return -ENOENT;
1932 }
1933
1934 gsi = s->pirq[pirq].gsi;
1935 if (gsi < 0) {
1936 return -EINVAL;
1937 }
1938
1939 /* Reassert a level IRQ if needed */
1940 if (s->pirq_gsi_set & (1U << gsi)) {
1941 int port = s->pirq[pirq].port;
1942 if (port) {
1943 set_port_pending(s, port);
1944 }
1945 }
1946
1947 return 0;
1948 }
1949
xen_physdev_query_pirq(struct physdev_irq_status_query * query)1950 int xen_physdev_query_pirq(struct physdev_irq_status_query *query)
1951 {
1952 XenEvtchnState *s = xen_evtchn_singleton;
1953 int pirq = query->irq;
1954
1955 if (!s) {
1956 return -ENOTSUP;
1957 }
1958
1959 BQL_LOCK_GUARD();
1960 QEMU_LOCK_GUARD(&s->port_lock);
1961
1962 if (!pirq_inuse(s, pirq)) {
1963 return -ENOENT;
1964 }
1965
1966 if (s->pirq[pirq].gsi >= 0) {
1967 query->flags = XENIRQSTAT_needs_eoi;
1968 } else {
1969 query->flags = 0;
1970 }
1971
1972 return 0;
1973 }
1974
xen_physdev_get_free_pirq(struct physdev_get_free_pirq * get)1975 int xen_physdev_get_free_pirq(struct physdev_get_free_pirq *get)
1976 {
1977 XenEvtchnState *s = xen_evtchn_singleton;
1978 int pirq;
1979
1980 if (!s) {
1981 return -ENOTSUP;
1982 }
1983
1984 QEMU_LOCK_GUARD(&s->port_lock);
1985
1986 pirq = allocate_pirq(s, get->type, IRQ_UNBOUND);
1987 if (pirq < 0) {
1988 return pirq;
1989 }
1990
1991 get->pirq = pirq;
1992 trace_kvm_xen_get_free_pirq(pirq, get->type);
1993 return 0;
1994 }
1995
xen_be_evtchn_open(void)1996 struct xenevtchn_handle *xen_be_evtchn_open(void)
1997 {
1998 struct xenevtchn_handle *xc = g_new0(struct xenevtchn_handle, 1);
1999
2000 xc->fd = eventfd(0, EFD_CLOEXEC);
2001 if (xc->fd < 0) {
2002 free(xc);
2003 return NULL;
2004 }
2005
2006 return xc;
2007 }
2008
find_be_port(XenEvtchnState * s,struct xenevtchn_handle * xc)2009 static int find_be_port(XenEvtchnState *s, struct xenevtchn_handle *xc)
2010 {
2011 int i;
2012
2013 for (i = 1; i < EVTCHN_2L_NR_CHANNELS; i++) {
2014 if (!s->be_handles[i]) {
2015 s->be_handles[i] = xc;
2016 xc->be_port = i;
2017 return i;
2018 }
2019 }
2020 return 0;
2021 }
2022
xen_be_evtchn_bind_interdomain(struct xenevtchn_handle * xc,uint32_t domid,evtchn_port_t guest_port)2023 int xen_be_evtchn_bind_interdomain(struct xenevtchn_handle *xc, uint32_t domid,
2024 evtchn_port_t guest_port)
2025 {
2026 XenEvtchnState *s = xen_evtchn_singleton;
2027 XenEvtchnPort *gp;
2028 uint16_t be_port = 0;
2029 int ret;
2030
2031 if (!s) {
2032 return -ENOTSUP;
2033 }
2034
2035 if (!xc) {
2036 return -EFAULT;
2037 }
2038
2039 if (domid != xen_domid) {
2040 return -ESRCH;
2041 }
2042
2043 if (!valid_port(guest_port)) {
2044 return -EINVAL;
2045 }
2046
2047 qemu_mutex_lock(&s->port_lock);
2048
2049 /* The guest has to have an unbound port waiting for us to bind */
2050 gp = &s->port_table[guest_port];
2051
2052 switch (gp->type) {
2053 case EVTCHNSTAT_interdomain:
2054 /* Allow rebinding after migration, preserve port # if possible */
2055 be_port = gp->u.interdomain.port;
2056 assert(be_port != 0);
2057 if (!s->be_handles[be_port]) {
2058 s->be_handles[be_port] = xc;
2059 xc->guest_port = guest_port;
2060 ret = xc->be_port = be_port;
2061 if (kvm_xen_has_cap(EVTCHN_SEND)) {
2062 assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2063 }
2064 break;
2065 }
2066 /* fall through */
2067
2068 case EVTCHNSTAT_unbound:
2069 be_port = find_be_port(s, xc);
2070 if (!be_port) {
2071 ret = -ENOSPC;
2072 goto out;
2073 }
2074
2075 gp->type = EVTCHNSTAT_interdomain;
2076 gp->u.interdomain.to_qemu = 1;
2077 gp->u.interdomain.port = be_port;
2078 xc->guest_port = guest_port;
2079 if (kvm_xen_has_cap(EVTCHN_SEND)) {
2080 assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2081 }
2082 ret = be_port;
2083 break;
2084
2085 default:
2086 ret = -EINVAL;
2087 break;
2088 }
2089
2090 out:
2091 qemu_mutex_unlock(&s->port_lock);
2092
2093 return ret;
2094 }
2095
xen_be_evtchn_unbind(struct xenevtchn_handle * xc,evtchn_port_t port)2096 int xen_be_evtchn_unbind(struct xenevtchn_handle *xc, evtchn_port_t port)
2097 {
2098 XenEvtchnState *s = xen_evtchn_singleton;
2099 int ret;
2100
2101 if (!s) {
2102 return -ENOTSUP;
2103 }
2104
2105 if (!xc) {
2106 return -EFAULT;
2107 }
2108
2109 qemu_mutex_lock(&s->port_lock);
2110
2111 if (port && port != xc->be_port) {
2112 ret = -EINVAL;
2113 goto out;
2114 }
2115
2116 if (xc->guest_port) {
2117 XenEvtchnPort *gp = &s->port_table[xc->guest_port];
2118
2119 /* This should never *not* be true */
2120 if (gp->type == EVTCHNSTAT_interdomain) {
2121 gp->type = EVTCHNSTAT_unbound;
2122 gp->u.interdomain.port = 0;
2123 }
2124
2125 if (kvm_xen_has_cap(EVTCHN_SEND)) {
2126 deassign_kernel_port(xc->guest_port);
2127 }
2128 xc->guest_port = 0;
2129 }
2130
2131 s->be_handles[xc->be_port] = NULL;
2132 xc->be_port = 0;
2133 ret = 0;
2134 out:
2135 qemu_mutex_unlock(&s->port_lock);
2136 return ret;
2137 }
2138
xen_be_evtchn_close(struct xenevtchn_handle * xc)2139 int xen_be_evtchn_close(struct xenevtchn_handle *xc)
2140 {
2141 if (!xc) {
2142 return -EFAULT;
2143 }
2144
2145 xen_be_evtchn_unbind(xc, 0);
2146
2147 close(xc->fd);
2148 free(xc);
2149 return 0;
2150 }
2151
xen_be_evtchn_fd(struct xenevtchn_handle * xc)2152 int xen_be_evtchn_fd(struct xenevtchn_handle *xc)
2153 {
2154 if (!xc) {
2155 return -1;
2156 }
2157 return xc->fd;
2158 }
2159
xen_be_evtchn_notify(struct xenevtchn_handle * xc,evtchn_port_t port)2160 int xen_be_evtchn_notify(struct xenevtchn_handle *xc, evtchn_port_t port)
2161 {
2162 XenEvtchnState *s = xen_evtchn_singleton;
2163 int ret;
2164
2165 if (!s) {
2166 return -ENOTSUP;
2167 }
2168
2169 if (!xc) {
2170 return -EFAULT;
2171 }
2172
2173 qemu_mutex_lock(&s->port_lock);
2174
2175 if (xc->guest_port) {
2176 set_port_pending(s, xc->guest_port);
2177 ret = 0;
2178 } else {
2179 ret = -ENOTCONN;
2180 }
2181
2182 qemu_mutex_unlock(&s->port_lock);
2183
2184 return ret;
2185 }
2186
xen_be_evtchn_pending(struct xenevtchn_handle * xc)2187 int xen_be_evtchn_pending(struct xenevtchn_handle *xc)
2188 {
2189 uint64_t val;
2190
2191 if (!xc) {
2192 return -EFAULT;
2193 }
2194
2195 if (!xc->be_port) {
2196 return 0;
2197 }
2198
2199 if (eventfd_read(xc->fd, &val)) {
2200 return -errno;
2201 }
2202
2203 return val ? xc->be_port : 0;
2204 }
2205
xen_be_evtchn_unmask(struct xenevtchn_handle * xc,evtchn_port_t port)2206 int xen_be_evtchn_unmask(struct xenevtchn_handle *xc, evtchn_port_t port)
2207 {
2208 if (!xc) {
2209 return -EFAULT;
2210 }
2211
2212 if (xc->be_port != port) {
2213 return -EINVAL;
2214 }
2215
2216 /*
2217 * We don't actually do anything to unmask it; the event was already
2218 * consumed in xen_be_evtchn_pending().
2219 */
2220 return 0;
2221 }
2222
xen_be_evtchn_get_guest_port(struct xenevtchn_handle * xc)2223 int xen_be_evtchn_get_guest_port(struct xenevtchn_handle *xc)
2224 {
2225 return xc->guest_port;
2226 }
2227
qmp_xen_event_list(Error ** errp)2228 EvtchnInfoList *qmp_xen_event_list(Error **errp)
2229 {
2230 XenEvtchnState *s = xen_evtchn_singleton;
2231 EvtchnInfoList *head = NULL, **tail = &head;
2232 void *shinfo, *pending, *mask;
2233 int i;
2234
2235 if (!s) {
2236 error_setg(errp, "Xen event channel emulation not enabled");
2237 return NULL;
2238 }
2239
2240 shinfo = xen_overlay_get_shinfo_ptr();
2241 if (!shinfo) {
2242 error_setg(errp, "Xen shared info page not allocated");
2243 return NULL;
2244 }
2245
2246 if (xen_is_long_mode()) {
2247 pending = shinfo + offsetof(struct shared_info, evtchn_pending);
2248 mask = shinfo + offsetof(struct shared_info, evtchn_mask);
2249 } else {
2250 pending = shinfo + offsetof(struct compat_shared_info, evtchn_pending);
2251 mask = shinfo + offsetof(struct compat_shared_info, evtchn_mask);
2252 }
2253
2254 QEMU_LOCK_GUARD(&s->port_lock);
2255
2256 for (i = 0; i < s->nr_ports; i++) {
2257 XenEvtchnPort *p = &s->port_table[i];
2258 EvtchnInfo *info;
2259
2260 if (p->type == EVTCHNSTAT_closed) {
2261 continue;
2262 }
2263
2264 info = g_new0(EvtchnInfo, 1);
2265
2266 info->port = i;
2267 qemu_build_assert(EVTCHN_PORT_TYPE_CLOSED == EVTCHNSTAT_closed);
2268 qemu_build_assert(EVTCHN_PORT_TYPE_UNBOUND == EVTCHNSTAT_unbound);
2269 qemu_build_assert(EVTCHN_PORT_TYPE_INTERDOMAIN == EVTCHNSTAT_interdomain);
2270 qemu_build_assert(EVTCHN_PORT_TYPE_PIRQ == EVTCHNSTAT_pirq);
2271 qemu_build_assert(EVTCHN_PORT_TYPE_VIRQ == EVTCHNSTAT_virq);
2272 qemu_build_assert(EVTCHN_PORT_TYPE_IPI == EVTCHNSTAT_ipi);
2273
2274 info->type = p->type;
2275 if (p->type == EVTCHNSTAT_interdomain) {
2276 info->remote_domain = g_strdup(p->u.interdomain.to_qemu ?
2277 "qemu" : "loopback");
2278 info->target = p->u.interdomain.port;
2279 } else {
2280 info->target = p->u.val; /* pirq# or virq# */
2281 }
2282 info->vcpu = p->vcpu;
2283 info->pending = test_bit(i, pending);
2284 info->masked = test_bit(i, mask);
2285
2286 QAPI_LIST_APPEND(tail, info);
2287 }
2288
2289 return head;
2290 }
2291
qmp_xen_event_inject(uint32_t port,Error ** errp)2292 void qmp_xen_event_inject(uint32_t port, Error **errp)
2293 {
2294 XenEvtchnState *s = xen_evtchn_singleton;
2295
2296 if (!s) {
2297 error_setg(errp, "Xen event channel emulation not enabled");
2298 return;
2299 }
2300
2301 if (!valid_port(port)) {
2302 error_setg(errp, "Invalid port %u", port);
2303 }
2304
2305 QEMU_LOCK_GUARD(&s->port_lock);
2306
2307 if (set_port_pending(s, port)) {
2308 error_setg(errp, "Failed to set port %u", port);
2309 return;
2310 }
2311 }
2312
hmp_xen_event_list(Monitor * mon,const QDict * qdict)2313 void hmp_xen_event_list(Monitor *mon, const QDict *qdict)
2314 {
2315 EvtchnInfoList *iter, *info_list;
2316 Error *err = NULL;
2317
2318 info_list = qmp_xen_event_list(&err);
2319 if (err) {
2320 hmp_handle_error(mon, err);
2321 return;
2322 }
2323
2324 for (iter = info_list; iter; iter = iter->next) {
2325 EvtchnInfo *info = iter->value;
2326
2327 monitor_printf(mon, "port %4u: vcpu: %d %s", info->port, info->vcpu,
2328 EvtchnPortType_str(info->type));
2329 if (info->type != EVTCHN_PORT_TYPE_IPI) {
2330 monitor_printf(mon, "(");
2331 if (info->remote_domain) {
2332 monitor_printf(mon, "%s:", info->remote_domain);
2333 }
2334 monitor_printf(mon, "%d)", info->target);
2335 }
2336 if (info->pending) {
2337 monitor_printf(mon, " PENDING");
2338 }
2339 if (info->masked) {
2340 monitor_printf(mon, " MASKED");
2341 }
2342 monitor_printf(mon, "\n");
2343 }
2344
2345 qapi_free_EvtchnInfoList(info_list);
2346 }
2347
hmp_xen_event_inject(Monitor * mon,const QDict * qdict)2348 void hmp_xen_event_inject(Monitor *mon, const QDict *qdict)
2349 {
2350 int port = qdict_get_int(qdict, "port");
2351 Error *err = NULL;
2352
2353 qmp_xen_event_inject(port, &err);
2354 if (err) {
2355 hmp_handle_error(mon, err);
2356 } else {
2357 monitor_printf(mon, "Delivered port %d\n", port);
2358 }
2359 }
2360
2361