1 /* 2 * QEMU Xen emulation: Shared/overlay pages support 3 * 4 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. 5 * 6 * Authors: David Woodhouse <dwmw2@infradead.org> 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2 or later. 9 * See the COPYING file in the top-level directory. 10 */ 11 12 #include "qemu/osdep.h" 13 14 #include "qemu/host-utils.h" 15 #include "qemu/module.h" 16 #include "qemu/main-loop.h" 17 #include "qemu/cutils.h" 18 #include "qapi/error.h" 19 #include "qom/object.h" 20 #include "migration/vmstate.h" 21 22 #include "hw/sysbus.h" 23 #include "hw/xen/xen.h" 24 #include "xen_overlay.h" 25 #include "xen_evtchn.h" 26 #include "xen_xenstore.h" 27 28 #include "sysemu/kvm.h" 29 #include "sysemu/kvm_xen.h" 30 31 #include "hw/xen/interface/io/xs_wire.h" 32 #include "hw/xen/interface/event_channel.h" 33 34 #define TYPE_XEN_XENSTORE "xen-xenstore" 35 OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE) 36 37 #define XEN_PAGE_SHIFT 12 38 #define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT) 39 40 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t)) 41 #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t)) 42 43 #define XENSTORE_HEADER_SIZE ((unsigned int)sizeof(struct xsd_sockmsg)) 44 45 struct XenXenstoreState { 46 /*< private >*/ 47 SysBusDevice busdev; 48 /*< public >*/ 49 50 MemoryRegion xenstore_page; 51 struct xenstore_domain_interface *xs; 52 uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX]; 53 uint8_t rsp_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX]; 54 uint32_t req_offset; 55 uint32_t rsp_offset; 56 bool rsp_pending; 57 bool fatal_error; 58 59 evtchn_port_t guest_port; 60 evtchn_port_t be_port; 61 struct xenevtchn_handle *eh; 62 }; 63 64 struct XenXenstoreState *xen_xenstore_singleton; 65 66 static void xen_xenstore_event(void *opaque); 67 68 static void xen_xenstore_realize(DeviceState *dev, Error **errp) 69 { 70 XenXenstoreState *s = XEN_XENSTORE(dev); 71 72 if (xen_mode != XEN_EMULATE) { 73 error_setg(errp, "Xen xenstore support is for Xen emulation"); 74 return; 75 } 76 memory_region_init_ram(&s->xenstore_page, OBJECT(dev), "xen:xenstore_page", 77 XEN_PAGE_SIZE, &error_abort); 78 memory_region_set_enabled(&s->xenstore_page, true); 79 s->xs = memory_region_get_ram_ptr(&s->xenstore_page); 80 memset(s->xs, 0, XEN_PAGE_SIZE); 81 82 /* We can't map it this early as KVM isn't ready */ 83 xen_xenstore_singleton = s; 84 85 s->eh = xen_be_evtchn_open(); 86 if (!s->eh) { 87 error_setg(errp, "Xenstore evtchn port init failed"); 88 return; 89 } 90 aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true, 91 xen_xenstore_event, NULL, NULL, NULL, s); 92 } 93 94 static bool xen_xenstore_is_needed(void *opaque) 95 { 96 return xen_mode == XEN_EMULATE; 97 } 98 99 static int xen_xenstore_pre_save(void *opaque) 100 { 101 XenXenstoreState *s = opaque; 102 103 if (s->eh) { 104 s->guest_port = xen_be_evtchn_get_guest_port(s->eh); 105 } 106 return 0; 107 } 108 109 static int xen_xenstore_post_load(void *opaque, int ver) 110 { 111 XenXenstoreState *s = opaque; 112 113 /* 114 * As qemu/dom0, rebind to the guest's port. The Windows drivers may 115 * unbind the XenStore evtchn and rebind to it, having obtained the 116 * "remote" port through EVTCHNOP_status. In the case that migration 117 * occurs while it's unbound, the "remote" port needs to be the same 118 * as before so that the guest can find it, but should remain unbound. 119 */ 120 if (s->guest_port) { 121 int be_port = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, 122 s->guest_port); 123 if (be_port < 0) { 124 return be_port; 125 } 126 s->be_port = be_port; 127 } 128 return 0; 129 } 130 131 static const VMStateDescription xen_xenstore_vmstate = { 132 .name = "xen_xenstore", 133 .version_id = 1, 134 .minimum_version_id = 1, 135 .needed = xen_xenstore_is_needed, 136 .pre_save = xen_xenstore_pre_save, 137 .post_load = xen_xenstore_post_load, 138 .fields = (VMStateField[]) { 139 VMSTATE_UINT8_ARRAY(req_data, XenXenstoreState, 140 sizeof_field(XenXenstoreState, req_data)), 141 VMSTATE_UINT8_ARRAY(rsp_data, XenXenstoreState, 142 sizeof_field(XenXenstoreState, rsp_data)), 143 VMSTATE_UINT32(req_offset, XenXenstoreState), 144 VMSTATE_UINT32(rsp_offset, XenXenstoreState), 145 VMSTATE_BOOL(rsp_pending, XenXenstoreState), 146 VMSTATE_UINT32(guest_port, XenXenstoreState), 147 VMSTATE_BOOL(fatal_error, XenXenstoreState), 148 VMSTATE_END_OF_LIST() 149 } 150 }; 151 152 static void xen_xenstore_class_init(ObjectClass *klass, void *data) 153 { 154 DeviceClass *dc = DEVICE_CLASS(klass); 155 156 dc->realize = xen_xenstore_realize; 157 dc->vmsd = &xen_xenstore_vmstate; 158 } 159 160 static const TypeInfo xen_xenstore_info = { 161 .name = TYPE_XEN_XENSTORE, 162 .parent = TYPE_SYS_BUS_DEVICE, 163 .instance_size = sizeof(XenXenstoreState), 164 .class_init = xen_xenstore_class_init, 165 }; 166 167 void xen_xenstore_create(void) 168 { 169 DeviceState *dev = sysbus_create_simple(TYPE_XEN_XENSTORE, -1, NULL); 170 171 xen_xenstore_singleton = XEN_XENSTORE(dev); 172 173 /* 174 * Defer the init (xen_xenstore_reset()) until KVM is set up and the 175 * overlay page can be mapped. 176 */ 177 } 178 179 static void xen_xenstore_register_types(void) 180 { 181 type_register_static(&xen_xenstore_info); 182 } 183 184 type_init(xen_xenstore_register_types) 185 186 uint16_t xen_xenstore_get_port(void) 187 { 188 XenXenstoreState *s = xen_xenstore_singleton; 189 if (!s) { 190 return 0; 191 } 192 return s->guest_port; 193 } 194 195 static bool req_pending(XenXenstoreState *s) 196 { 197 struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data; 198 199 return s->req_offset == XENSTORE_HEADER_SIZE + req->len; 200 } 201 202 static void reset_req(XenXenstoreState *s) 203 { 204 memset(s->req_data, 0, sizeof(s->req_data)); 205 s->req_offset = 0; 206 } 207 208 static void reset_rsp(XenXenstoreState *s) 209 { 210 s->rsp_pending = false; 211 212 memset(s->rsp_data, 0, sizeof(s->rsp_data)); 213 s->rsp_offset = 0; 214 } 215 216 static void process_req(XenXenstoreState *s) 217 { 218 struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data; 219 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; 220 const char enosys[] = "ENOSYS"; 221 222 assert(req_pending(s)); 223 assert(!s->rsp_pending); 224 225 rsp->type = XS_ERROR; 226 rsp->req_id = req->req_id; 227 rsp->tx_id = req->tx_id; 228 rsp->len = sizeof(enosys); 229 memcpy((void *)&rsp[1], enosys, sizeof(enosys)); 230 231 s->rsp_pending = true; 232 reset_req(s); 233 } 234 235 static unsigned int copy_from_ring(XenXenstoreState *s, uint8_t *ptr, 236 unsigned int len) 237 { 238 if (!len) { 239 return 0; 240 } 241 242 XENSTORE_RING_IDX prod = qatomic_read(&s->xs->req_prod); 243 XENSTORE_RING_IDX cons = qatomic_read(&s->xs->req_cons); 244 unsigned int copied = 0; 245 246 /* Ensure the ring contents don't cross the req_prod access. */ 247 smp_rmb(); 248 249 while (len) { 250 unsigned int avail = prod - cons; 251 unsigned int offset = MASK_XENSTORE_IDX(cons); 252 unsigned int copylen = avail; 253 254 if (avail > XENSTORE_RING_SIZE) { 255 error_report("XenStore ring handling error"); 256 s->fatal_error = true; 257 break; 258 } else if (avail == 0) { 259 break; 260 } 261 262 if (copylen > len) { 263 copylen = len; 264 } 265 if (copylen > XENSTORE_RING_SIZE - offset) { 266 copylen = XENSTORE_RING_SIZE - offset; 267 } 268 269 memcpy(ptr, &s->xs->req[offset], copylen); 270 copied += copylen; 271 272 ptr += copylen; 273 len -= copylen; 274 275 cons += copylen; 276 } 277 278 /* 279 * Not sure this ever mattered except on Alpha, but this barrier 280 * is to ensure that the update to req_cons is globally visible 281 * only after we have consumed all the data from the ring, and we 282 * don't end up seeing data written to the ring *after* the other 283 * end sees the update and writes more to the ring. Xen's own 284 * xenstored has the same barrier here (although with no comment 285 * at all, obviously, because it's Xen code). 286 */ 287 smp_mb(); 288 289 qatomic_set(&s->xs->req_cons, cons); 290 291 return copied; 292 } 293 294 static unsigned int copy_to_ring(XenXenstoreState *s, uint8_t *ptr, 295 unsigned int len) 296 { 297 if (!len) { 298 return 0; 299 } 300 301 XENSTORE_RING_IDX cons = qatomic_read(&s->xs->rsp_cons); 302 XENSTORE_RING_IDX prod = qatomic_read(&s->xs->rsp_prod); 303 unsigned int copied = 0; 304 305 /* 306 * This matches the barrier in copy_to_ring() (or the guest's 307 * equivalent) betweem writing the data to the ring and updating 308 * rsp_prod. It protects against the pathological case (which 309 * again I think never happened except on Alpha) where our 310 * subsequent writes to the ring could *cross* the read of 311 * rsp_cons and the guest could see the new data when it was 312 * intending to read the old. 313 */ 314 smp_mb(); 315 316 while (len) { 317 unsigned int avail = cons + XENSTORE_RING_SIZE - prod; 318 unsigned int offset = MASK_XENSTORE_IDX(prod); 319 unsigned int copylen = len; 320 321 if (avail > XENSTORE_RING_SIZE) { 322 error_report("XenStore ring handling error"); 323 s->fatal_error = true; 324 break; 325 } else if (avail == 0) { 326 break; 327 } 328 329 if (copylen > avail) { 330 copylen = avail; 331 } 332 if (copylen > XENSTORE_RING_SIZE - offset) { 333 copylen = XENSTORE_RING_SIZE - offset; 334 } 335 336 337 memcpy(&s->xs->rsp[offset], ptr, copylen); 338 copied += copylen; 339 340 ptr += copylen; 341 len -= copylen; 342 343 prod += copylen; 344 } 345 346 /* Ensure the ring contents are seen before rsp_prod update. */ 347 smp_wmb(); 348 349 qatomic_set(&s->xs->rsp_prod, prod); 350 351 return copied; 352 } 353 354 static unsigned int get_req(XenXenstoreState *s) 355 { 356 unsigned int copied = 0; 357 358 if (s->fatal_error) { 359 return 0; 360 } 361 362 assert(!req_pending(s)); 363 364 if (s->req_offset < XENSTORE_HEADER_SIZE) { 365 void *ptr = s->req_data + s->req_offset; 366 unsigned int len = XENSTORE_HEADER_SIZE; 367 unsigned int copylen = copy_from_ring(s, ptr, len); 368 369 copied += copylen; 370 s->req_offset += copylen; 371 } 372 373 if (s->req_offset >= XENSTORE_HEADER_SIZE) { 374 struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data; 375 376 if (req->len > (uint32_t)XENSTORE_PAYLOAD_MAX) { 377 error_report("Illegal XenStore request"); 378 s->fatal_error = true; 379 return 0; 380 } 381 382 void *ptr = s->req_data + s->req_offset; 383 unsigned int len = XENSTORE_HEADER_SIZE + req->len - s->req_offset; 384 unsigned int copylen = copy_from_ring(s, ptr, len); 385 386 copied += copylen; 387 s->req_offset += copylen; 388 } 389 390 return copied; 391 } 392 393 static unsigned int put_rsp(XenXenstoreState *s) 394 { 395 if (s->fatal_error) { 396 return 0; 397 } 398 399 assert(s->rsp_pending); 400 401 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; 402 assert(s->rsp_offset < XENSTORE_HEADER_SIZE + rsp->len); 403 404 void *ptr = s->rsp_data + s->rsp_offset; 405 unsigned int len = XENSTORE_HEADER_SIZE + rsp->len - s->rsp_offset; 406 unsigned int copylen = copy_to_ring(s, ptr, len); 407 408 s->rsp_offset += copylen; 409 410 /* Have we produced a complete response? */ 411 if (s->rsp_offset == XENSTORE_HEADER_SIZE + rsp->len) { 412 reset_rsp(s); 413 } 414 415 return copylen; 416 } 417 418 static void xen_xenstore_event(void *opaque) 419 { 420 XenXenstoreState *s = opaque; 421 evtchn_port_t port = xen_be_evtchn_pending(s->eh); 422 unsigned int copied_to, copied_from; 423 bool processed, notify = false; 424 425 if (port != s->be_port) { 426 return; 427 } 428 429 /* We know this is a no-op. */ 430 xen_be_evtchn_unmask(s->eh, port); 431 432 do { 433 copied_to = copied_from = 0; 434 processed = false; 435 436 if (s->rsp_pending) { 437 copied_to = put_rsp(s); 438 } 439 440 if (!req_pending(s)) { 441 copied_from = get_req(s); 442 } 443 444 if (req_pending(s) && !s->rsp_pending) { 445 process_req(s); 446 processed = true; 447 } 448 449 notify |= copied_to || copied_from; 450 } while (copied_to || copied_from || processed); 451 452 if (notify) { 453 xen_be_evtchn_notify(s->eh, s->be_port); 454 } 455 } 456 457 static void alloc_guest_port(XenXenstoreState *s) 458 { 459 struct evtchn_alloc_unbound alloc = { 460 .dom = DOMID_SELF, 461 .remote_dom = DOMID_QEMU, 462 }; 463 464 if (!xen_evtchn_alloc_unbound_op(&alloc)) { 465 s->guest_port = alloc.port; 466 } 467 } 468 469 int xen_xenstore_reset(void) 470 { 471 XenXenstoreState *s = xen_xenstore_singleton; 472 int err; 473 474 if (!s) { 475 return -ENOTSUP; 476 } 477 478 s->req_offset = s->rsp_offset = 0; 479 s->rsp_pending = false; 480 481 if (!memory_region_is_mapped(&s->xenstore_page)) { 482 uint64_t gpa = XEN_SPECIAL_PFN(XENSTORE) << TARGET_PAGE_BITS; 483 xen_overlay_do_map_page(&s->xenstore_page, gpa); 484 } 485 486 alloc_guest_port(s); 487 488 /* 489 * As qemu/dom0, bind to the guest's port. For incoming migration, this 490 * will be unbound as the guest's evtchn table is overwritten. We then 491 * rebind to the correct guest port in xen_xenstore_post_load(). 492 */ 493 err = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, s->guest_port); 494 if (err < 0) { 495 return err; 496 } 497 s->be_port = err; 498 499 return 0; 500 } 501