/* * QEMU Xen emulation: Shared/overlay pages support * * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Authors: David Woodhouse * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. */ #include "qemu/osdep.h" #include "qemu/host-utils.h" #include "qemu/module.h" #include "qemu/main-loop.h" #include "qemu/cutils.h" #include "qapi/error.h" #include "qom/object.h" #include "migration/vmstate.h" #include "hw/sysbus.h" #include "hw/xen/xen.h" #include "xen_overlay.h" #include "xen_evtchn.h" #include "xen_xenstore.h" #include "sysemu/kvm.h" #include "sysemu/kvm_xen.h" #include "hw/xen/interface/io/xs_wire.h" #include "hw/xen/interface/event_channel.h" #define TYPE_XEN_XENSTORE "xen-xenstore" OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE) #define XEN_PAGE_SHIFT 12 #define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT) #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t)) #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t)) #define XENSTORE_HEADER_SIZE ((unsigned int)sizeof(struct xsd_sockmsg)) struct XenXenstoreState { /*< private >*/ SysBusDevice busdev; /*< public >*/ MemoryRegion xenstore_page; struct xenstore_domain_interface *xs; uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX]; uint8_t rsp_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX]; uint32_t req_offset; uint32_t rsp_offset; bool rsp_pending; bool fatal_error; evtchn_port_t guest_port; evtchn_port_t be_port; struct xenevtchn_handle *eh; }; struct XenXenstoreState *xen_xenstore_singleton; static void xen_xenstore_event(void *opaque); static void xen_xenstore_realize(DeviceState *dev, Error **errp) { XenXenstoreState *s = XEN_XENSTORE(dev); if (xen_mode != XEN_EMULATE) { error_setg(errp, "Xen xenstore support is for Xen emulation"); return; } memory_region_init_ram(&s->xenstore_page, OBJECT(dev), "xen:xenstore_page", XEN_PAGE_SIZE, &error_abort); memory_region_set_enabled(&s->xenstore_page, true); s->xs = memory_region_get_ram_ptr(&s->xenstore_page); memset(s->xs, 0, XEN_PAGE_SIZE); /* We can't map it this early as KVM isn't ready */ xen_xenstore_singleton = s; s->eh = xen_be_evtchn_open(); if (!s->eh) { error_setg(errp, "Xenstore evtchn port init failed"); return; } aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true, xen_xenstore_event, NULL, NULL, NULL, s); } static bool xen_xenstore_is_needed(void *opaque) { return xen_mode == XEN_EMULATE; } static int xen_xenstore_pre_save(void *opaque) { XenXenstoreState *s = opaque; if (s->eh) { s->guest_port = xen_be_evtchn_get_guest_port(s->eh); } return 0; } static int xen_xenstore_post_load(void *opaque, int ver) { XenXenstoreState *s = opaque; /* * As qemu/dom0, rebind to the guest's port. The Windows drivers may * unbind the XenStore evtchn and rebind to it, having obtained the * "remote" port through EVTCHNOP_status. In the case that migration * occurs while it's unbound, the "remote" port needs to be the same * as before so that the guest can find it, but should remain unbound. */ if (s->guest_port) { int be_port = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, s->guest_port); if (be_port < 0) { return be_port; } s->be_port = be_port; } return 0; } static const VMStateDescription xen_xenstore_vmstate = { .name = "xen_xenstore", .version_id = 1, .minimum_version_id = 1, .needed = xen_xenstore_is_needed, .pre_save = xen_xenstore_pre_save, .post_load = xen_xenstore_post_load, .fields = (VMStateField[]) { VMSTATE_UINT8_ARRAY(req_data, XenXenstoreState, sizeof_field(XenXenstoreState, req_data)), VMSTATE_UINT8_ARRAY(rsp_data, XenXenstoreState, sizeof_field(XenXenstoreState, rsp_data)), VMSTATE_UINT32(req_offset, XenXenstoreState), VMSTATE_UINT32(rsp_offset, XenXenstoreState), VMSTATE_BOOL(rsp_pending, XenXenstoreState), VMSTATE_UINT32(guest_port, XenXenstoreState), VMSTATE_BOOL(fatal_error, XenXenstoreState), VMSTATE_END_OF_LIST() } }; static void xen_xenstore_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = xen_xenstore_realize; dc->vmsd = &xen_xenstore_vmstate; } static const TypeInfo xen_xenstore_info = { .name = TYPE_XEN_XENSTORE, .parent = TYPE_SYS_BUS_DEVICE, .instance_size = sizeof(XenXenstoreState), .class_init = xen_xenstore_class_init, }; void xen_xenstore_create(void) { DeviceState *dev = sysbus_create_simple(TYPE_XEN_XENSTORE, -1, NULL); xen_xenstore_singleton = XEN_XENSTORE(dev); /* * Defer the init (xen_xenstore_reset()) until KVM is set up and the * overlay page can be mapped. */ } static void xen_xenstore_register_types(void) { type_register_static(&xen_xenstore_info); } type_init(xen_xenstore_register_types) uint16_t xen_xenstore_get_port(void) { XenXenstoreState *s = xen_xenstore_singleton; if (!s) { return 0; } return s->guest_port; } static bool req_pending(XenXenstoreState *s) { struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data; return s->req_offset == XENSTORE_HEADER_SIZE + req->len; } static void reset_req(XenXenstoreState *s) { memset(s->req_data, 0, sizeof(s->req_data)); s->req_offset = 0; } static void reset_rsp(XenXenstoreState *s) { s->rsp_pending = false; memset(s->rsp_data, 0, sizeof(s->rsp_data)); s->rsp_offset = 0; } static void process_req(XenXenstoreState *s) { struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data; struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; const char enosys[] = "ENOSYS"; assert(req_pending(s)); assert(!s->rsp_pending); rsp->type = XS_ERROR; rsp->req_id = req->req_id; rsp->tx_id = req->tx_id; rsp->len = sizeof(enosys); memcpy((void *)&rsp[1], enosys, sizeof(enosys)); s->rsp_pending = true; reset_req(s); } static unsigned int copy_from_ring(XenXenstoreState *s, uint8_t *ptr, unsigned int len) { if (!len) { return 0; } XENSTORE_RING_IDX prod = qatomic_read(&s->xs->req_prod); XENSTORE_RING_IDX cons = qatomic_read(&s->xs->req_cons); unsigned int copied = 0; /* Ensure the ring contents don't cross the req_prod access. */ smp_rmb(); while (len) { unsigned int avail = prod - cons; unsigned int offset = MASK_XENSTORE_IDX(cons); unsigned int copylen = avail; if (avail > XENSTORE_RING_SIZE) { error_report("XenStore ring handling error"); s->fatal_error = true; break; } else if (avail == 0) { break; } if (copylen > len) { copylen = len; } if (copylen > XENSTORE_RING_SIZE - offset) { copylen = XENSTORE_RING_SIZE - offset; } memcpy(ptr, &s->xs->req[offset], copylen); copied += copylen; ptr += copylen; len -= copylen; cons += copylen; } /* * Not sure this ever mattered except on Alpha, but this barrier * is to ensure that the update to req_cons is globally visible * only after we have consumed all the data from the ring, and we * don't end up seeing data written to the ring *after* the other * end sees the update and writes more to the ring. Xen's own * xenstored has the same barrier here (although with no comment * at all, obviously, because it's Xen code). */ smp_mb(); qatomic_set(&s->xs->req_cons, cons); return copied; } static unsigned int copy_to_ring(XenXenstoreState *s, uint8_t *ptr, unsigned int len) { if (!len) { return 0; } XENSTORE_RING_IDX cons = qatomic_read(&s->xs->rsp_cons); XENSTORE_RING_IDX prod = qatomic_read(&s->xs->rsp_prod); unsigned int copied = 0; /* * This matches the barrier in copy_to_ring() (or the guest's * equivalent) betweem writing the data to the ring and updating * rsp_prod. It protects against the pathological case (which * again I think never happened except on Alpha) where our * subsequent writes to the ring could *cross* the read of * rsp_cons and the guest could see the new data when it was * intending to read the old. */ smp_mb(); while (len) { unsigned int avail = cons + XENSTORE_RING_SIZE - prod; unsigned int offset = MASK_XENSTORE_IDX(prod); unsigned int copylen = len; if (avail > XENSTORE_RING_SIZE) { error_report("XenStore ring handling error"); s->fatal_error = true; break; } else if (avail == 0) { break; } if (copylen > avail) { copylen = avail; } if (copylen > XENSTORE_RING_SIZE - offset) { copylen = XENSTORE_RING_SIZE - offset; } memcpy(&s->xs->rsp[offset], ptr, copylen); copied += copylen; ptr += copylen; len -= copylen; prod += copylen; } /* Ensure the ring contents are seen before rsp_prod update. */ smp_wmb(); qatomic_set(&s->xs->rsp_prod, prod); return copied; } static unsigned int get_req(XenXenstoreState *s) { unsigned int copied = 0; if (s->fatal_error) { return 0; } assert(!req_pending(s)); if (s->req_offset < XENSTORE_HEADER_SIZE) { void *ptr = s->req_data + s->req_offset; unsigned int len = XENSTORE_HEADER_SIZE; unsigned int copylen = copy_from_ring(s, ptr, len); copied += copylen; s->req_offset += copylen; } if (s->req_offset >= XENSTORE_HEADER_SIZE) { struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data; if (req->len > (uint32_t)XENSTORE_PAYLOAD_MAX) { error_report("Illegal XenStore request"); s->fatal_error = true; return 0; } void *ptr = s->req_data + s->req_offset; unsigned int len = XENSTORE_HEADER_SIZE + req->len - s->req_offset; unsigned int copylen = copy_from_ring(s, ptr, len); copied += copylen; s->req_offset += copylen; } return copied; } static unsigned int put_rsp(XenXenstoreState *s) { if (s->fatal_error) { return 0; } assert(s->rsp_pending); struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; assert(s->rsp_offset < XENSTORE_HEADER_SIZE + rsp->len); void *ptr = s->rsp_data + s->rsp_offset; unsigned int len = XENSTORE_HEADER_SIZE + rsp->len - s->rsp_offset; unsigned int copylen = copy_to_ring(s, ptr, len); s->rsp_offset += copylen; /* Have we produced a complete response? */ if (s->rsp_offset == XENSTORE_HEADER_SIZE + rsp->len) { reset_rsp(s); } return copylen; } static void xen_xenstore_event(void *opaque) { XenXenstoreState *s = opaque; evtchn_port_t port = xen_be_evtchn_pending(s->eh); unsigned int copied_to, copied_from; bool processed, notify = false; if (port != s->be_port) { return; } /* We know this is a no-op. */ xen_be_evtchn_unmask(s->eh, port); do { copied_to = copied_from = 0; processed = false; if (s->rsp_pending) { copied_to = put_rsp(s); } if (!req_pending(s)) { copied_from = get_req(s); } if (req_pending(s) && !s->rsp_pending) { process_req(s); processed = true; } notify |= copied_to || copied_from; } while (copied_to || copied_from || processed); if (notify) { xen_be_evtchn_notify(s->eh, s->be_port); } } static void alloc_guest_port(XenXenstoreState *s) { struct evtchn_alloc_unbound alloc = { .dom = DOMID_SELF, .remote_dom = DOMID_QEMU, }; if (!xen_evtchn_alloc_unbound_op(&alloc)) { s->guest_port = alloc.port; } } int xen_xenstore_reset(void) { XenXenstoreState *s = xen_xenstore_singleton; int err; if (!s) { return -ENOTSUP; } s->req_offset = s->rsp_offset = 0; s->rsp_pending = false; if (!memory_region_is_mapped(&s->xenstore_page)) { uint64_t gpa = XEN_SPECIAL_PFN(XENSTORE) << TARGET_PAGE_BITS; xen_overlay_do_map_page(&s->xenstore_page, gpa); } alloc_guest_port(s); /* * As qemu/dom0, bind to the guest's port. For incoming migration, this * will be unbound as the guest's evtchn table is overwritten. We then * rebind to the correct guest port in xen_xenstore_post_load(). */ err = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, s->guest_port); if (err < 0) { return err; } s->be_port = err; return 0; }