1 /* 2 * QEMU Xen emulation: Grant table support 3 * 4 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. 5 * 6 * Authors: David Woodhouse <dwmw2@infradead.org> 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2 or later. 9 * See the COPYING file in the top-level directory. 10 */ 11 12 #include "qemu/osdep.h" 13 #include "qemu/host-utils.h" 14 #include "qemu/module.h" 15 #include "qemu/lockable.h" 16 #include "qemu/main-loop.h" 17 #include "qapi/error.h" 18 #include "qom/object.h" 19 #include "exec/target_page.h" 20 #include "exec/address-spaces.h" 21 #include "migration/vmstate.h" 22 23 #include "hw/sysbus.h" 24 #include "hw/xen/xen.h" 25 #include "hw/xen/xen_backend_ops.h" 26 #include "xen_overlay.h" 27 #include "xen_gnttab.h" 28 29 #include "sysemu/kvm.h" 30 #include "sysemu/kvm_xen.h" 31 32 #include "hw/xen/interface/memory.h" 33 #include "hw/xen/interface/grant_table.h" 34 35 #define TYPE_XEN_GNTTAB "xen-gnttab" 36 OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB) 37 38 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t)) 39 40 static struct gnttab_backend_ops emu_gnttab_backend_ops; 41 42 struct XenGnttabState { 43 /*< private >*/ 44 SysBusDevice busdev; 45 /*< public >*/ 46 47 QemuMutex gnt_lock; 48 49 uint32_t nr_frames; 50 uint32_t max_frames; 51 52 union { 53 grant_entry_v1_t *v1; 54 /* Theoretically, v2 support could be added here. */ 55 } entries; 56 57 MemoryRegion gnt_frames; 58 MemoryRegion *gnt_aliases; 59 uint64_t *gnt_frame_gpas; 60 61 uint8_t *map_track; 62 }; 63 64 struct XenGnttabState *xen_gnttab_singleton; 65 66 static void xen_gnttab_realize(DeviceState *dev, Error **errp) 67 { 68 XenGnttabState *s = XEN_GNTTAB(dev); 69 int i; 70 71 if (xen_mode != XEN_EMULATE) { 72 error_setg(errp, "Xen grant table support is for Xen emulation"); 73 return; 74 } 75 s->max_frames = kvm_xen_get_gnttab_max_frames(); 76 memory_region_init_ram(&s->gnt_frames, OBJECT(dev), "xen:grant_table", 77 XEN_PAGE_SIZE * s->max_frames, &error_abort); 78 memory_region_set_enabled(&s->gnt_frames, true); 79 s->entries.v1 = memory_region_get_ram_ptr(&s->gnt_frames); 80 81 /* Create individual page-sizes aliases for overlays */ 82 s->gnt_aliases = (void *)g_new0(MemoryRegion, s->max_frames); 83 s->gnt_frame_gpas = (void *)g_new(uint64_t, s->max_frames); 84 for (i = 0; i < s->max_frames; i++) { 85 memory_region_init_alias(&s->gnt_aliases[i], OBJECT(dev), 86 NULL, &s->gnt_frames, 87 i * XEN_PAGE_SIZE, XEN_PAGE_SIZE); 88 s->gnt_frame_gpas[i] = INVALID_GPA; 89 } 90 91 s->nr_frames = 0; 92 memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames); 93 s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; 94 s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE); 95 96 qemu_mutex_init(&s->gnt_lock); 97 98 xen_gnttab_singleton = s; 99 100 s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1); 101 102 xen_gnttab_ops = &emu_gnttab_backend_ops; 103 } 104 105 static int xen_gnttab_post_load(void *opaque, int version_id) 106 { 107 XenGnttabState *s = XEN_GNTTAB(opaque); 108 uint32_t i; 109 110 for (i = 0; i < s->nr_frames; i++) { 111 if (s->gnt_frame_gpas[i] != INVALID_GPA) { 112 xen_overlay_do_map_page(&s->gnt_aliases[i], s->gnt_frame_gpas[i]); 113 } 114 } 115 return 0; 116 } 117 118 static bool xen_gnttab_is_needed(void *opaque) 119 { 120 return xen_mode == XEN_EMULATE; 121 } 122 123 static const VMStateDescription xen_gnttab_vmstate = { 124 .name = "xen_gnttab", 125 .version_id = 1, 126 .minimum_version_id = 1, 127 .needed = xen_gnttab_is_needed, 128 .post_load = xen_gnttab_post_load, 129 .fields = (VMStateField[]) { 130 VMSTATE_UINT32(nr_frames, XenGnttabState), 131 VMSTATE_VARRAY_UINT32(gnt_frame_gpas, XenGnttabState, nr_frames, 0, 132 vmstate_info_uint64, uint64_t), 133 VMSTATE_END_OF_LIST() 134 } 135 }; 136 137 static void xen_gnttab_class_init(ObjectClass *klass, void *data) 138 { 139 DeviceClass *dc = DEVICE_CLASS(klass); 140 141 dc->realize = xen_gnttab_realize; 142 dc->vmsd = &xen_gnttab_vmstate; 143 } 144 145 static const TypeInfo xen_gnttab_info = { 146 .name = TYPE_XEN_GNTTAB, 147 .parent = TYPE_SYS_BUS_DEVICE, 148 .instance_size = sizeof(XenGnttabState), 149 .class_init = xen_gnttab_class_init, 150 }; 151 152 void xen_gnttab_create(void) 153 { 154 xen_gnttab_singleton = XEN_GNTTAB(sysbus_create_simple(TYPE_XEN_GNTTAB, 155 -1, NULL)); 156 } 157 158 static void xen_gnttab_register_types(void) 159 { 160 type_register_static(&xen_gnttab_info); 161 } 162 163 type_init(xen_gnttab_register_types) 164 165 int xen_gnttab_map_page(uint64_t idx, uint64_t gfn) 166 { 167 XenGnttabState *s = xen_gnttab_singleton; 168 uint64_t gpa = gfn << XEN_PAGE_SHIFT; 169 170 if (!s) { 171 return -ENOTSUP; 172 } 173 174 if (idx >= s->max_frames) { 175 return -EINVAL; 176 } 177 178 QEMU_IOTHREAD_LOCK_GUARD(); 179 QEMU_LOCK_GUARD(&s->gnt_lock); 180 181 xen_overlay_do_map_page(&s->gnt_aliases[idx], gpa); 182 183 s->gnt_frame_gpas[idx] = gpa; 184 185 if (s->nr_frames <= idx) { 186 s->nr_frames = idx + 1; 187 } 188 189 return 0; 190 } 191 192 int xen_gnttab_set_version_op(struct gnttab_set_version *set) 193 { 194 int ret; 195 196 switch (set->version) { 197 case 1: 198 ret = 0; 199 break; 200 201 case 2: 202 /* Behave as before set_version was introduced. */ 203 ret = -ENOSYS; 204 break; 205 206 default: 207 ret = -EINVAL; 208 } 209 210 set->version = 1; 211 return ret; 212 } 213 214 int xen_gnttab_get_version_op(struct gnttab_get_version *get) 215 { 216 if (get->dom != DOMID_SELF && get->dom != xen_domid) { 217 return -ESRCH; 218 } 219 220 get->version = 1; 221 return 0; 222 } 223 224 int xen_gnttab_query_size_op(struct gnttab_query_size *size) 225 { 226 XenGnttabState *s = xen_gnttab_singleton; 227 228 if (!s) { 229 return -ENOTSUP; 230 } 231 232 if (size->dom != DOMID_SELF && size->dom != xen_domid) { 233 size->status = GNTST_bad_domain; 234 return 0; 235 } 236 237 size->status = GNTST_okay; 238 size->nr_frames = s->nr_frames; 239 size->max_nr_frames = s->max_frames; 240 return 0; 241 } 242 243 /* Track per-open refs, to allow close() to clean up. */ 244 struct active_ref { 245 MemoryRegionSection mrs; 246 void *virtaddr; 247 uint32_t refcnt; 248 int prot; 249 }; 250 251 static void gnt_unref(XenGnttabState *s, grant_ref_t ref, 252 MemoryRegionSection *mrs, int prot) 253 { 254 if (mrs && mrs->mr) { 255 if (prot & PROT_WRITE) { 256 memory_region_set_dirty(mrs->mr, mrs->offset_within_region, 257 XEN_PAGE_SIZE); 258 } 259 memory_region_unref(mrs->mr); 260 mrs->mr = NULL; 261 } 262 assert(s->map_track[ref] != 0); 263 264 if (--s->map_track[ref] == 0) { 265 grant_entry_v1_t *gnt_p = &s->entries.v1[ref]; 266 qatomic_and(&gnt_p->flags, (uint16_t)~(GTF_reading | GTF_writing)); 267 } 268 } 269 270 static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot) 271 { 272 uint16_t mask = GTF_type_mask | GTF_sub_page; 273 grant_entry_v1_t gnt, *gnt_p; 274 int retries = 0; 275 276 if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 || 277 s->map_track[ref] == UINT8_MAX) { 278 return INVALID_GPA; 279 } 280 281 if (prot & PROT_WRITE) { 282 mask |= GTF_readonly; 283 } 284 285 gnt_p = &s->entries.v1[ref]; 286 287 /* 288 * The guest can legitimately be changing the GTF_readonly flag. Allow 289 * that, but don't let a malicious guest cause a livelock. 290 */ 291 for (retries = 0; retries < 5; retries++) { 292 uint16_t new_flags; 293 294 /* Read the entry before an atomic operation on its flags */ 295 gnt = *(volatile grant_entry_v1_t *)gnt_p; 296 297 if ((gnt.flags & mask) != GTF_permit_access || 298 gnt.domid != DOMID_QEMU) { 299 return INVALID_GPA; 300 } 301 302 new_flags = gnt.flags | GTF_reading; 303 if (prot & PROT_WRITE) { 304 new_flags |= GTF_writing; 305 } 306 307 if (qatomic_cmpxchg(&gnt_p->flags, gnt.flags, new_flags) == gnt.flags) { 308 return (uint64_t)gnt.frame << XEN_PAGE_SHIFT; 309 } 310 } 311 312 return INVALID_GPA; 313 } 314 315 struct xengntdev_handle { 316 GHashTable *active_maps; 317 }; 318 319 static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt, 320 uint32_t nr_grants) 321 { 322 return 0; 323 } 324 325 static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt, 326 uint32_t count, uint32_t domid, 327 uint32_t *refs, int prot) 328 { 329 XenGnttabState *s = xen_gnttab_singleton; 330 struct active_ref *act; 331 332 if (!s) { 333 errno = ENOTSUP; 334 return NULL; 335 } 336 337 if (domid != xen_domid) { 338 errno = EINVAL; 339 return NULL; 340 } 341 342 if (!count || count > 4096) { 343 errno = EINVAL; 344 return NULL; 345 } 346 347 /* 348 * Making a contiguous mapping from potentially discontiguous grant 349 * references would be... distinctly non-trivial. We don't support it. 350 * Even changing the API to return an array of pointers, one per page, 351 * wouldn't be simple to use in PV backends because some structures 352 * actually cross page boundaries (e.g. 32-bit blkif_response ring 353 * entries are 12 bytes). 354 */ 355 if (count != 1) { 356 errno = EINVAL; 357 return NULL; 358 } 359 360 QEMU_LOCK_GUARD(&s->gnt_lock); 361 362 act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0])); 363 if (act) { 364 if ((prot & PROT_WRITE) && !(act->prot & PROT_WRITE)) { 365 if (gnt_ref(s, refs[0], prot) == INVALID_GPA) { 366 return NULL; 367 } 368 act->prot |= PROT_WRITE; 369 } 370 act->refcnt++; 371 } else { 372 uint64_t gpa = gnt_ref(s, refs[0], prot); 373 if (gpa == INVALID_GPA) { 374 errno = EINVAL; 375 return NULL; 376 } 377 378 act = g_new0(struct active_ref, 1); 379 act->prot = prot; 380 act->refcnt = 1; 381 act->mrs = memory_region_find(get_system_memory(), gpa, XEN_PAGE_SIZE); 382 383 if (act->mrs.mr && 384 !int128_lt(act->mrs.size, int128_make64(XEN_PAGE_SIZE)) && 385 memory_region_get_ram_addr(act->mrs.mr) != RAM_ADDR_INVALID) { 386 act->virtaddr = qemu_map_ram_ptr(act->mrs.mr->ram_block, 387 act->mrs.offset_within_region); 388 } 389 if (!act->virtaddr) { 390 gnt_unref(s, refs[0], &act->mrs, 0); 391 g_free(act); 392 errno = EINVAL; 393 return NULL; 394 } 395 396 s->map_track[refs[0]]++; 397 g_hash_table_insert(xgt->active_maps, GINT_TO_POINTER(refs[0]), act); 398 } 399 400 return act->virtaddr; 401 } 402 403 static gboolean do_unmap(gpointer key, gpointer value, gpointer user_data) 404 { 405 XenGnttabState *s = user_data; 406 grant_ref_t gref = GPOINTER_TO_INT(key); 407 struct active_ref *act = value; 408 409 gnt_unref(s, gref, &act->mrs, act->prot); 410 g_free(act); 411 return true; 412 } 413 414 static int xen_be_gnttab_unmap(struct xengntdev_handle *xgt, 415 void *start_address, uint32_t *refs, 416 uint32_t count) 417 { 418 XenGnttabState *s = xen_gnttab_singleton; 419 struct active_ref *act; 420 421 if (!s) { 422 return -ENOTSUP; 423 } 424 425 if (count != 1) { 426 return -EINVAL; 427 } 428 429 QEMU_LOCK_GUARD(&s->gnt_lock); 430 431 act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0])); 432 if (!act) { 433 return -ENOENT; 434 } 435 436 if (act->virtaddr != start_address) { 437 return -EINVAL; 438 } 439 440 if (!--act->refcnt) { 441 do_unmap(GINT_TO_POINTER(refs[0]), act, s); 442 g_hash_table_remove(xgt->active_maps, GINT_TO_POINTER(refs[0])); 443 } 444 445 return 0; 446 } 447 448 /* 449 * This looks a bit like the one for true Xen in xen-operations.c but 450 * in emulation we don't support multi-page mappings. And under Xen we 451 * *want* the multi-page mappings so we have fewer bounces through the 452 * kernel and the hypervisor. So the code paths end up being similar, 453 * but different. 454 */ 455 static int xen_be_gnttab_copy(struct xengntdev_handle *xgt, bool to_domain, 456 uint32_t domid, XenGrantCopySegment *segs, 457 uint32_t nr_segs, Error **errp) 458 { 459 int prot = to_domain ? PROT_WRITE : PROT_READ; 460 unsigned int i; 461 462 for (i = 0; i < nr_segs; i++) { 463 XenGrantCopySegment *seg = &segs[i]; 464 void *page; 465 uint32_t ref = to_domain ? seg->dest.foreign.ref : 466 seg->source.foreign.ref; 467 468 page = xen_be_gnttab_map_refs(xgt, 1, domid, &ref, prot); 469 if (!page) { 470 if (errp) { 471 error_setg_errno(errp, errno, 472 "xen_be_gnttab_map_refs failed"); 473 } 474 return -errno; 475 } 476 477 if (to_domain) { 478 memcpy(page + seg->dest.foreign.offset, seg->source.virt, 479 seg->len); 480 } else { 481 memcpy(seg->dest.virt, page + seg->source.foreign.offset, 482 seg->len); 483 } 484 485 if (xen_be_gnttab_unmap(xgt, page, &ref, 1)) { 486 if (errp) { 487 error_setg_errno(errp, errno, "xen_be_gnttab_unmap failed"); 488 } 489 return -errno; 490 } 491 } 492 493 return 0; 494 } 495 496 static struct xengntdev_handle *xen_be_gnttab_open(void) 497 { 498 struct xengntdev_handle *xgt = g_new0(struct xengntdev_handle, 1); 499 500 xgt->active_maps = g_hash_table_new(g_direct_hash, g_direct_equal); 501 return xgt; 502 } 503 504 static int xen_be_gnttab_close(struct xengntdev_handle *xgt) 505 { 506 XenGnttabState *s = xen_gnttab_singleton; 507 508 if (!s) { 509 return -ENOTSUP; 510 } 511 512 g_hash_table_foreach_remove(xgt->active_maps, do_unmap, s); 513 g_hash_table_destroy(xgt->active_maps); 514 g_free(xgt); 515 return 0; 516 } 517 518 static struct gnttab_backend_ops emu_gnttab_backend_ops = { 519 .open = xen_be_gnttab_open, 520 .close = xen_be_gnttab_close, 521 .grant_copy = xen_be_gnttab_copy, 522 .set_max_grants = xen_be_gnttab_set_max_grants, 523 .map_refs = xen_be_gnttab_map_refs, 524 .unmap = xen_be_gnttab_unmap, 525 }; 526 527 int xen_gnttab_reset(void) 528 { 529 XenGnttabState *s = xen_gnttab_singleton; 530 531 if (!s) { 532 return -ENOTSUP; 533 } 534 535 QEMU_LOCK_GUARD(&s->gnt_lock); 536 537 s->nr_frames = 0; 538 539 memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames); 540 541 s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; 542 s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE); 543 544 memset(s->map_track, 0, s->max_frames * ENTRIES_PER_FRAME_V1); 545 546 return 0; 547 } 548