1 /* 2 * Copyright (C) 2010 Citrix Ltd. 3 * 4 * This work is licensed under the terms of the GNU GPL, version 2. See 5 * the COPYING file in the top-level directory. 6 * 7 * Contributions after 2012-01-13 are licensed under the terms of the 8 * GNU GPL, version 2 or (at your option) any later version. 9 */ 10 11 #include "qemu/osdep.h" 12 13 #include "cpu.h" 14 #include "hw/pci/pci.h" 15 #include "hw/pci/pci_host.h" 16 #include "hw/i386/pc.h" 17 #include "hw/irq.h" 18 #include "hw/i386/apic-msidef.h" 19 #include "hw/xen/xen_common.h" 20 #include "hw/xen/xen-legacy-backend.h" 21 #include "hw/xen/xen-bus.h" 22 #include "qapi/error.h" 23 #include "qapi/qapi-commands-misc.h" 24 #include "qemu/error-report.h" 25 #include "qemu/range.h" 26 #include "sysemu/xen-mapcache.h" 27 #include "trace.h" 28 #include "exec/address-spaces.h" 29 30 #include <xen/hvm/ioreq.h> 31 #include <xen/hvm/e820.h> 32 33 //#define DEBUG_XEN_HVM 34 35 #ifdef DEBUG_XEN_HVM 36 #define DPRINTF(fmt, ...) \ 37 do { fprintf(stderr, "xen: " fmt, ## __VA_ARGS__); } while (0) 38 #else 39 #define DPRINTF(fmt, ...) \ 40 do { } while (0) 41 #endif 42 43 static MemoryRegion ram_memory, ram_640k, ram_lo, ram_hi; 44 static MemoryRegion *framebuffer; 45 static bool xen_in_migration; 46 47 /* Compatibility with older version */ 48 49 /* This allows QEMU to build on a system that has Xen 4.5 or earlier 50 * installed. This here (not in hw/xen/xen_common.h) because xen/hvm/ioreq.h 51 * needs to be included before this block and hw/xen/xen_common.h needs to 52 * be included before xen/hvm/ioreq.h 53 */ 54 #ifndef IOREQ_TYPE_VMWARE_PORT 55 #define IOREQ_TYPE_VMWARE_PORT 3 56 struct vmware_regs { 57 uint32_t esi; 58 uint32_t edi; 59 uint32_t ebx; 60 uint32_t ecx; 61 uint32_t edx; 62 }; 63 typedef struct vmware_regs vmware_regs_t; 64 65 struct shared_vmport_iopage { 66 struct vmware_regs vcpu_vmport_regs[1]; 67 }; 68 typedef struct shared_vmport_iopage shared_vmport_iopage_t; 69 #endif 70 71 static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i) 72 { 73 return shared_page->vcpu_ioreq[i].vp_eport; 74 } 75 static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu) 76 { 77 return &shared_page->vcpu_ioreq[vcpu]; 78 } 79 80 #define BUFFER_IO_MAX_DELAY 100 81 82 typedef struct XenPhysmap { 83 hwaddr start_addr; 84 ram_addr_t size; 85 const char *name; 86 hwaddr phys_offset; 87 88 QLIST_ENTRY(XenPhysmap) list; 89 } XenPhysmap; 90 91 static QLIST_HEAD(, XenPhysmap) xen_physmap; 92 93 typedef struct XenPciDevice { 94 PCIDevice *pci_dev; 95 uint32_t sbdf; 96 QLIST_ENTRY(XenPciDevice) entry; 97 } XenPciDevice; 98 99 typedef struct XenIOState { 100 ioservid_t ioservid; 101 shared_iopage_t *shared_page; 102 shared_vmport_iopage_t *shared_vmport_page; 103 buffered_iopage_t *buffered_io_page; 104 QEMUTimer *buffered_io_timer; 105 CPUState **cpu_by_vcpu_id; 106 /* the evtchn port for polling the notification, */ 107 evtchn_port_t *ioreq_local_port; 108 /* evtchn remote and local ports for buffered io */ 109 evtchn_port_t bufioreq_remote_port; 110 evtchn_port_t bufioreq_local_port; 111 /* the evtchn fd for polling */ 112 xenevtchn_handle *xce_handle; 113 /* which vcpu we are serving */ 114 int send_vcpu; 115 116 struct xs_handle *xenstore; 117 MemoryListener memory_listener; 118 MemoryListener io_listener; 119 QLIST_HEAD(, XenPciDevice) dev_list; 120 DeviceListener device_listener; 121 hwaddr free_phys_offset; 122 const XenPhysmap *log_for_dirtybit; 123 /* Buffer used by xen_sync_dirty_bitmap */ 124 unsigned long *dirty_bitmap; 125 126 Notifier exit; 127 Notifier suspend; 128 Notifier wakeup; 129 } XenIOState; 130 131 /* Xen specific function for piix pci */ 132 133 int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) 134 { 135 return irq_num + ((pci_dev->devfn >> 3) << 2); 136 } 137 138 void xen_piix3_set_irq(void *opaque, int irq_num, int level) 139 { 140 xen_set_pci_intx_level(xen_domid, 0, 0, irq_num >> 2, 141 irq_num & 3, level); 142 } 143 144 void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len) 145 { 146 int i; 147 148 /* Scan for updates to PCI link routes (0x60-0x63). */ 149 for (i = 0; i < len; i++) { 150 uint8_t v = (val >> (8 * i)) & 0xff; 151 if (v & 0x80) { 152 v = 0; 153 } 154 v &= 0xf; 155 if (((address + i) >= 0x60) && ((address + i) <= 0x63)) { 156 xen_set_pci_link_route(xen_domid, address + i - 0x60, v); 157 } 158 } 159 } 160 161 int xen_is_pirq_msi(uint32_t msi_data) 162 { 163 /* If vector is 0, the msi is remapped into a pirq, passed as 164 * dest_id. 165 */ 166 return ((msi_data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT) == 0; 167 } 168 169 void xen_hvm_inject_msi(uint64_t addr, uint32_t data) 170 { 171 xen_inject_msi(xen_domid, addr, data); 172 } 173 174 static void xen_suspend_notifier(Notifier *notifier, void *data) 175 { 176 xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 3); 177 } 178 179 /* Xen Interrupt Controller */ 180 181 static void xen_set_irq(void *opaque, int irq, int level) 182 { 183 xen_set_isa_irq_level(xen_domid, irq, level); 184 } 185 186 qemu_irq *xen_interrupt_controller_init(void) 187 { 188 return qemu_allocate_irqs(xen_set_irq, NULL, 16); 189 } 190 191 /* Memory Ops */ 192 193 static void xen_ram_init(PCMachineState *pcms, 194 ram_addr_t ram_size, MemoryRegion **ram_memory_p) 195 { 196 MemoryRegion *sysmem = get_system_memory(); 197 ram_addr_t block_len; 198 uint64_t user_lowmem = object_property_get_uint(qdev_get_machine(), 199 PC_MACHINE_MAX_RAM_BELOW_4G, 200 &error_abort); 201 202 /* Handle the machine opt max-ram-below-4g. It is basically doing 203 * min(xen limit, user limit). 204 */ 205 if (!user_lowmem) { 206 user_lowmem = HVM_BELOW_4G_RAM_END; /* default */ 207 } 208 if (HVM_BELOW_4G_RAM_END <= user_lowmem) { 209 user_lowmem = HVM_BELOW_4G_RAM_END; 210 } 211 212 if (ram_size >= user_lowmem) { 213 pcms->above_4g_mem_size = ram_size - user_lowmem; 214 pcms->below_4g_mem_size = user_lowmem; 215 } else { 216 pcms->above_4g_mem_size = 0; 217 pcms->below_4g_mem_size = ram_size; 218 } 219 if (!pcms->above_4g_mem_size) { 220 block_len = ram_size; 221 } else { 222 /* 223 * Xen does not allocate the memory continuously, it keeps a 224 * hole of the size computed above or passed in. 225 */ 226 block_len = (1ULL << 32) + pcms->above_4g_mem_size; 227 } 228 memory_region_init_ram(&ram_memory, NULL, "xen.ram", block_len, 229 &error_fatal); 230 *ram_memory_p = &ram_memory; 231 232 memory_region_init_alias(&ram_640k, NULL, "xen.ram.640k", 233 &ram_memory, 0, 0xa0000); 234 memory_region_add_subregion(sysmem, 0, &ram_640k); 235 /* Skip of the VGA IO memory space, it will be registered later by the VGA 236 * emulated device. 237 * 238 * The area between 0xc0000 and 0x100000 will be used by SeaBIOS to load 239 * the Options ROM, so it is registered here as RAM. 240 */ 241 memory_region_init_alias(&ram_lo, NULL, "xen.ram.lo", 242 &ram_memory, 0xc0000, 243 pcms->below_4g_mem_size - 0xc0000); 244 memory_region_add_subregion(sysmem, 0xc0000, &ram_lo); 245 if (pcms->above_4g_mem_size > 0) { 246 memory_region_init_alias(&ram_hi, NULL, "xen.ram.hi", 247 &ram_memory, 0x100000000ULL, 248 pcms->above_4g_mem_size); 249 memory_region_add_subregion(sysmem, 0x100000000ULL, &ram_hi); 250 } 251 } 252 253 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr, 254 Error **errp) 255 { 256 unsigned long nr_pfn; 257 xen_pfn_t *pfn_list; 258 int i; 259 260 if (runstate_check(RUN_STATE_INMIGRATE)) { 261 /* RAM already populated in Xen */ 262 fprintf(stderr, "%s: do not alloc "RAM_ADDR_FMT 263 " bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE\n", 264 __func__, size, ram_addr); 265 return; 266 } 267 268 if (mr == &ram_memory) { 269 return; 270 } 271 272 trace_xen_ram_alloc(ram_addr, size); 273 274 nr_pfn = size >> TARGET_PAGE_BITS; 275 pfn_list = g_malloc(sizeof (*pfn_list) * nr_pfn); 276 277 for (i = 0; i < nr_pfn; i++) { 278 pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i; 279 } 280 281 if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) { 282 error_setg(errp, "xen: failed to populate ram at " RAM_ADDR_FMT, 283 ram_addr); 284 } 285 286 g_free(pfn_list); 287 } 288 289 static XenPhysmap *get_physmapping(hwaddr start_addr, ram_addr_t size) 290 { 291 XenPhysmap *physmap = NULL; 292 293 start_addr &= TARGET_PAGE_MASK; 294 295 QLIST_FOREACH(physmap, &xen_physmap, list) { 296 if (range_covers_byte(physmap->start_addr, physmap->size, start_addr)) { 297 return physmap; 298 } 299 } 300 return NULL; 301 } 302 303 static hwaddr xen_phys_offset_to_gaddr(hwaddr phys_offset, ram_addr_t size) 304 { 305 hwaddr addr = phys_offset & TARGET_PAGE_MASK; 306 XenPhysmap *physmap = NULL; 307 308 QLIST_FOREACH(physmap, &xen_physmap, list) { 309 if (range_covers_byte(physmap->phys_offset, physmap->size, addr)) { 310 return physmap->start_addr + (phys_offset - physmap->phys_offset); 311 } 312 } 313 314 return phys_offset; 315 } 316 317 #ifdef XEN_COMPAT_PHYSMAP 318 static int xen_save_physmap(XenIOState *state, XenPhysmap *physmap) 319 { 320 char path[80], value[17]; 321 322 snprintf(path, sizeof(path), 323 "/local/domain/0/device-model/%d/physmap/%"PRIx64"/start_addr", 324 xen_domid, (uint64_t)physmap->phys_offset); 325 snprintf(value, sizeof(value), "%"PRIx64, (uint64_t)physmap->start_addr); 326 if (!xs_write(state->xenstore, 0, path, value, strlen(value))) { 327 return -1; 328 } 329 snprintf(path, sizeof(path), 330 "/local/domain/0/device-model/%d/physmap/%"PRIx64"/size", 331 xen_domid, (uint64_t)physmap->phys_offset); 332 snprintf(value, sizeof(value), "%"PRIx64, (uint64_t)physmap->size); 333 if (!xs_write(state->xenstore, 0, path, value, strlen(value))) { 334 return -1; 335 } 336 if (physmap->name) { 337 snprintf(path, sizeof(path), 338 "/local/domain/0/device-model/%d/physmap/%"PRIx64"/name", 339 xen_domid, (uint64_t)physmap->phys_offset); 340 if (!xs_write(state->xenstore, 0, path, 341 physmap->name, strlen(physmap->name))) { 342 return -1; 343 } 344 } 345 return 0; 346 } 347 #else 348 static int xen_save_physmap(XenIOState *state, XenPhysmap *physmap) 349 { 350 return 0; 351 } 352 #endif 353 354 static int xen_add_to_physmap(XenIOState *state, 355 hwaddr start_addr, 356 ram_addr_t size, 357 MemoryRegion *mr, 358 hwaddr offset_within_region) 359 { 360 unsigned long nr_pages; 361 int rc = 0; 362 XenPhysmap *physmap = NULL; 363 hwaddr pfn, start_gpfn; 364 hwaddr phys_offset = memory_region_get_ram_addr(mr); 365 const char *mr_name; 366 367 if (get_physmapping(start_addr, size)) { 368 return 0; 369 } 370 if (size <= 0) { 371 return -1; 372 } 373 374 /* Xen can only handle a single dirty log region for now and we want 375 * the linear framebuffer to be that region. 376 * Avoid tracking any regions that is not videoram and avoid tracking 377 * the legacy vga region. */ 378 if (mr == framebuffer && start_addr > 0xbffff) { 379 goto go_physmap; 380 } 381 return -1; 382 383 go_physmap: 384 DPRINTF("mapping vram to %"HWADDR_PRIx" - %"HWADDR_PRIx"\n", 385 start_addr, start_addr + size); 386 387 mr_name = memory_region_name(mr); 388 389 physmap = g_malloc(sizeof(XenPhysmap)); 390 391 physmap->start_addr = start_addr; 392 physmap->size = size; 393 physmap->name = mr_name; 394 physmap->phys_offset = phys_offset; 395 396 QLIST_INSERT_HEAD(&xen_physmap, physmap, list); 397 398 if (runstate_check(RUN_STATE_INMIGRATE)) { 399 /* Now when we have a physmap entry we can replace a dummy mapping with 400 * a real one of guest foreign memory. */ 401 uint8_t *p = xen_replace_cache_entry(phys_offset, start_addr, size); 402 assert(p && p == memory_region_get_ram_ptr(mr)); 403 404 return 0; 405 } 406 407 pfn = phys_offset >> TARGET_PAGE_BITS; 408 start_gpfn = start_addr >> TARGET_PAGE_BITS; 409 nr_pages = size >> TARGET_PAGE_BITS; 410 rc = xendevicemodel_relocate_memory(xen_dmod, xen_domid, nr_pages, pfn, 411 start_gpfn); 412 if (rc) { 413 int saved_errno = errno; 414 415 error_report("relocate_memory %lu pages from GFN %"HWADDR_PRIx 416 " to GFN %"HWADDR_PRIx" failed: %s", 417 nr_pages, pfn, start_gpfn, strerror(saved_errno)); 418 errno = saved_errno; 419 return -1; 420 } 421 422 rc = xendevicemodel_pin_memory_cacheattr(xen_dmod, xen_domid, 423 start_addr >> TARGET_PAGE_BITS, 424 (start_addr + size - 1) >> TARGET_PAGE_BITS, 425 XEN_DOMCTL_MEM_CACHEATTR_WB); 426 if (rc) { 427 error_report("pin_memory_cacheattr failed: %s", strerror(errno)); 428 } 429 return xen_save_physmap(state, physmap); 430 } 431 432 static int xen_remove_from_physmap(XenIOState *state, 433 hwaddr start_addr, 434 ram_addr_t size) 435 { 436 int rc = 0; 437 XenPhysmap *physmap = NULL; 438 hwaddr phys_offset = 0; 439 440 physmap = get_physmapping(start_addr, size); 441 if (physmap == NULL) { 442 return -1; 443 } 444 445 phys_offset = physmap->phys_offset; 446 size = physmap->size; 447 448 DPRINTF("unmapping vram to %"HWADDR_PRIx" - %"HWADDR_PRIx", at " 449 "%"HWADDR_PRIx"\n", start_addr, start_addr + size, phys_offset); 450 451 size >>= TARGET_PAGE_BITS; 452 start_addr >>= TARGET_PAGE_BITS; 453 phys_offset >>= TARGET_PAGE_BITS; 454 rc = xendevicemodel_relocate_memory(xen_dmod, xen_domid, size, start_addr, 455 phys_offset); 456 if (rc) { 457 int saved_errno = errno; 458 459 error_report("relocate_memory "RAM_ADDR_FMT" pages" 460 " from GFN %"HWADDR_PRIx 461 " to GFN %"HWADDR_PRIx" failed: %s", 462 size, start_addr, phys_offset, strerror(saved_errno)); 463 errno = saved_errno; 464 return -1; 465 } 466 467 QLIST_REMOVE(physmap, list); 468 if (state->log_for_dirtybit == physmap) { 469 state->log_for_dirtybit = NULL; 470 g_free(state->dirty_bitmap); 471 state->dirty_bitmap = NULL; 472 } 473 g_free(physmap); 474 475 return 0; 476 } 477 478 static void xen_set_memory(struct MemoryListener *listener, 479 MemoryRegionSection *section, 480 bool add) 481 { 482 XenIOState *state = container_of(listener, XenIOState, memory_listener); 483 hwaddr start_addr = section->offset_within_address_space; 484 ram_addr_t size = int128_get64(section->size); 485 bool log_dirty = memory_region_is_logging(section->mr, DIRTY_MEMORY_VGA); 486 hvmmem_type_t mem_type; 487 488 if (section->mr == &ram_memory) { 489 return; 490 } else { 491 if (add) { 492 xen_map_memory_section(xen_domid, state->ioservid, 493 section); 494 } else { 495 xen_unmap_memory_section(xen_domid, state->ioservid, 496 section); 497 } 498 } 499 500 if (!memory_region_is_ram(section->mr)) { 501 return; 502 } 503 504 if (log_dirty != add) { 505 return; 506 } 507 508 trace_xen_client_set_memory(start_addr, size, log_dirty); 509 510 start_addr &= TARGET_PAGE_MASK; 511 size = TARGET_PAGE_ALIGN(size); 512 513 if (add) { 514 if (!memory_region_is_rom(section->mr)) { 515 xen_add_to_physmap(state, start_addr, size, 516 section->mr, section->offset_within_region); 517 } else { 518 mem_type = HVMMEM_ram_ro; 519 if (xen_set_mem_type(xen_domid, mem_type, 520 start_addr >> TARGET_PAGE_BITS, 521 size >> TARGET_PAGE_BITS)) { 522 DPRINTF("xen_set_mem_type error, addr: "TARGET_FMT_plx"\n", 523 start_addr); 524 } 525 } 526 } else { 527 if (xen_remove_from_physmap(state, start_addr, size) < 0) { 528 DPRINTF("physmapping does not exist at "TARGET_FMT_plx"\n", start_addr); 529 } 530 } 531 } 532 533 static void xen_region_add(MemoryListener *listener, 534 MemoryRegionSection *section) 535 { 536 memory_region_ref(section->mr); 537 xen_set_memory(listener, section, true); 538 } 539 540 static void xen_region_del(MemoryListener *listener, 541 MemoryRegionSection *section) 542 { 543 xen_set_memory(listener, section, false); 544 memory_region_unref(section->mr); 545 } 546 547 static void xen_io_add(MemoryListener *listener, 548 MemoryRegionSection *section) 549 { 550 XenIOState *state = container_of(listener, XenIOState, io_listener); 551 MemoryRegion *mr = section->mr; 552 553 if (mr->ops == &unassigned_io_ops) { 554 return; 555 } 556 557 memory_region_ref(mr); 558 559 xen_map_io_section(xen_domid, state->ioservid, section); 560 } 561 562 static void xen_io_del(MemoryListener *listener, 563 MemoryRegionSection *section) 564 { 565 XenIOState *state = container_of(listener, XenIOState, io_listener); 566 MemoryRegion *mr = section->mr; 567 568 if (mr->ops == &unassigned_io_ops) { 569 return; 570 } 571 572 xen_unmap_io_section(xen_domid, state->ioservid, section); 573 574 memory_region_unref(mr); 575 } 576 577 static void xen_device_realize(DeviceListener *listener, 578 DeviceState *dev) 579 { 580 XenIOState *state = container_of(listener, XenIOState, device_listener); 581 582 if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { 583 PCIDevice *pci_dev = PCI_DEVICE(dev); 584 XenPciDevice *xendev = g_new(XenPciDevice, 1); 585 586 xendev->pci_dev = pci_dev; 587 xendev->sbdf = PCI_BUILD_BDF(pci_dev_bus_num(pci_dev), 588 pci_dev->devfn); 589 QLIST_INSERT_HEAD(&state->dev_list, xendev, entry); 590 591 xen_map_pcidev(xen_domid, state->ioservid, pci_dev); 592 } 593 } 594 595 static void xen_device_unrealize(DeviceListener *listener, 596 DeviceState *dev) 597 { 598 XenIOState *state = container_of(listener, XenIOState, device_listener); 599 600 if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { 601 PCIDevice *pci_dev = PCI_DEVICE(dev); 602 XenPciDevice *xendev, *next; 603 604 xen_unmap_pcidev(xen_domid, state->ioservid, pci_dev); 605 606 QLIST_FOREACH_SAFE(xendev, &state->dev_list, entry, next) { 607 if (xendev->pci_dev == pci_dev) { 608 QLIST_REMOVE(xendev, entry); 609 g_free(xendev); 610 break; 611 } 612 } 613 } 614 } 615 616 static void xen_sync_dirty_bitmap(XenIOState *state, 617 hwaddr start_addr, 618 ram_addr_t size) 619 { 620 hwaddr npages = size >> TARGET_PAGE_BITS; 621 const int width = sizeof(unsigned long) * 8; 622 size_t bitmap_size = DIV_ROUND_UP(npages, width); 623 int rc, i, j; 624 const XenPhysmap *physmap = NULL; 625 626 physmap = get_physmapping(start_addr, size); 627 if (physmap == NULL) { 628 /* not handled */ 629 return; 630 } 631 632 if (state->log_for_dirtybit == NULL) { 633 state->log_for_dirtybit = physmap; 634 state->dirty_bitmap = g_new(unsigned long, bitmap_size); 635 } else if (state->log_for_dirtybit != physmap) { 636 /* Only one range for dirty bitmap can be tracked. */ 637 return; 638 } 639 640 rc = xen_track_dirty_vram(xen_domid, start_addr >> TARGET_PAGE_BITS, 641 npages, state->dirty_bitmap); 642 if (rc < 0) { 643 #ifndef ENODATA 644 #define ENODATA ENOENT 645 #endif 646 if (errno == ENODATA) { 647 memory_region_set_dirty(framebuffer, 0, size); 648 DPRINTF("xen: track_dirty_vram failed (0x" TARGET_FMT_plx 649 ", 0x" TARGET_FMT_plx "): %s\n", 650 start_addr, start_addr + size, strerror(errno)); 651 } 652 return; 653 } 654 655 for (i = 0; i < bitmap_size; i++) { 656 unsigned long map = state->dirty_bitmap[i]; 657 while (map != 0) { 658 j = ctzl(map); 659 map &= ~(1ul << j); 660 memory_region_set_dirty(framebuffer, 661 (i * width + j) * TARGET_PAGE_SIZE, 662 TARGET_PAGE_SIZE); 663 }; 664 } 665 } 666 667 static void xen_log_start(MemoryListener *listener, 668 MemoryRegionSection *section, 669 int old, int new) 670 { 671 XenIOState *state = container_of(listener, XenIOState, memory_listener); 672 673 if (new & ~old & (1 << DIRTY_MEMORY_VGA)) { 674 xen_sync_dirty_bitmap(state, section->offset_within_address_space, 675 int128_get64(section->size)); 676 } 677 } 678 679 static void xen_log_stop(MemoryListener *listener, MemoryRegionSection *section, 680 int old, int new) 681 { 682 XenIOState *state = container_of(listener, XenIOState, memory_listener); 683 684 if (old & ~new & (1 << DIRTY_MEMORY_VGA)) { 685 state->log_for_dirtybit = NULL; 686 g_free(state->dirty_bitmap); 687 state->dirty_bitmap = NULL; 688 /* Disable dirty bit tracking */ 689 xen_track_dirty_vram(xen_domid, 0, 0, NULL); 690 } 691 } 692 693 static void xen_log_sync(MemoryListener *listener, MemoryRegionSection *section) 694 { 695 XenIOState *state = container_of(listener, XenIOState, memory_listener); 696 697 xen_sync_dirty_bitmap(state, section->offset_within_address_space, 698 int128_get64(section->size)); 699 } 700 701 static void xen_log_global_start(MemoryListener *listener) 702 { 703 if (xen_enabled()) { 704 xen_in_migration = true; 705 } 706 } 707 708 static void xen_log_global_stop(MemoryListener *listener) 709 { 710 xen_in_migration = false; 711 } 712 713 static MemoryListener xen_memory_listener = { 714 .region_add = xen_region_add, 715 .region_del = xen_region_del, 716 .log_start = xen_log_start, 717 .log_stop = xen_log_stop, 718 .log_sync = xen_log_sync, 719 .log_global_start = xen_log_global_start, 720 .log_global_stop = xen_log_global_stop, 721 .priority = 10, 722 }; 723 724 static MemoryListener xen_io_listener = { 725 .region_add = xen_io_add, 726 .region_del = xen_io_del, 727 .priority = 10, 728 }; 729 730 static DeviceListener xen_device_listener = { 731 .realize = xen_device_realize, 732 .unrealize = xen_device_unrealize, 733 }; 734 735 /* get the ioreq packets from share mem */ 736 static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu) 737 { 738 ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu); 739 740 if (req->state != STATE_IOREQ_READY) { 741 DPRINTF("I/O request not ready: " 742 "%x, ptr: %x, port: %"PRIx64", " 743 "data: %"PRIx64", count: %u, size: %u\n", 744 req->state, req->data_is_ptr, req->addr, 745 req->data, req->count, req->size); 746 return NULL; 747 } 748 749 xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */ 750 751 req->state = STATE_IOREQ_INPROCESS; 752 return req; 753 } 754 755 /* use poll to get the port notification */ 756 /* ioreq_vec--out,the */ 757 /* retval--the number of ioreq packet */ 758 static ioreq_t *cpu_get_ioreq(XenIOState *state) 759 { 760 MachineState *ms = MACHINE(qdev_get_machine()); 761 unsigned int max_cpus = ms->smp.max_cpus; 762 int i; 763 evtchn_port_t port; 764 765 port = xenevtchn_pending(state->xce_handle); 766 if (port == state->bufioreq_local_port) { 767 timer_mod(state->buffered_io_timer, 768 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); 769 return NULL; 770 } 771 772 if (port != -1) { 773 for (i = 0; i < max_cpus; i++) { 774 if (state->ioreq_local_port[i] == port) { 775 break; 776 } 777 } 778 779 if (i == max_cpus) { 780 hw_error("Fatal error while trying to get io event!\n"); 781 } 782 783 /* unmask the wanted port again */ 784 xenevtchn_unmask(state->xce_handle, port); 785 786 /* get the io packet from shared memory */ 787 state->send_vcpu = i; 788 return cpu_get_ioreq_from_shared_memory(state, i); 789 } 790 791 /* read error or read nothing */ 792 return NULL; 793 } 794 795 static uint32_t do_inp(uint32_t addr, unsigned long size) 796 { 797 switch (size) { 798 case 1: 799 return cpu_inb(addr); 800 case 2: 801 return cpu_inw(addr); 802 case 4: 803 return cpu_inl(addr); 804 default: 805 hw_error("inp: bad size: %04x %lx", addr, size); 806 } 807 } 808 809 static void do_outp(uint32_t addr, 810 unsigned long size, uint32_t val) 811 { 812 switch (size) { 813 case 1: 814 return cpu_outb(addr, val); 815 case 2: 816 return cpu_outw(addr, val); 817 case 4: 818 return cpu_outl(addr, val); 819 default: 820 hw_error("outp: bad size: %04x %lx", addr, size); 821 } 822 } 823 824 /* 825 * Helper functions which read/write an object from/to physical guest 826 * memory, as part of the implementation of an ioreq. 827 * 828 * Equivalent to 829 * cpu_physical_memory_rw(addr + (req->df ? -1 : +1) * req->size * i, 830 * val, req->size, 0/1) 831 * except without the integer overflow problems. 832 */ 833 static void rw_phys_req_item(hwaddr addr, 834 ioreq_t *req, uint32_t i, void *val, int rw) 835 { 836 /* Do everything unsigned so overflow just results in a truncated result 837 * and accesses to undesired parts of guest memory, which is up 838 * to the guest */ 839 hwaddr offset = (hwaddr)req->size * i; 840 if (req->df) { 841 addr -= offset; 842 } else { 843 addr += offset; 844 } 845 cpu_physical_memory_rw(addr, val, req->size, rw); 846 } 847 848 static inline void read_phys_req_item(hwaddr addr, 849 ioreq_t *req, uint32_t i, void *val) 850 { 851 rw_phys_req_item(addr, req, i, val, 0); 852 } 853 static inline void write_phys_req_item(hwaddr addr, 854 ioreq_t *req, uint32_t i, void *val) 855 { 856 rw_phys_req_item(addr, req, i, val, 1); 857 } 858 859 860 static void cpu_ioreq_pio(ioreq_t *req) 861 { 862 uint32_t i; 863 864 trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr, 865 req->data, req->count, req->size); 866 867 if (req->size > sizeof(uint32_t)) { 868 hw_error("PIO: bad size (%u)", req->size); 869 } 870 871 if (req->dir == IOREQ_READ) { 872 if (!req->data_is_ptr) { 873 req->data = do_inp(req->addr, req->size); 874 trace_cpu_ioreq_pio_read_reg(req, req->data, req->addr, 875 req->size); 876 } else { 877 uint32_t tmp; 878 879 for (i = 0; i < req->count; i++) { 880 tmp = do_inp(req->addr, req->size); 881 write_phys_req_item(req->data, req, i, &tmp); 882 } 883 } 884 } else if (req->dir == IOREQ_WRITE) { 885 if (!req->data_is_ptr) { 886 trace_cpu_ioreq_pio_write_reg(req, req->data, req->addr, 887 req->size); 888 do_outp(req->addr, req->size, req->data); 889 } else { 890 for (i = 0; i < req->count; i++) { 891 uint32_t tmp = 0; 892 893 read_phys_req_item(req->data, req, i, &tmp); 894 do_outp(req->addr, req->size, tmp); 895 } 896 } 897 } 898 } 899 900 static void cpu_ioreq_move(ioreq_t *req) 901 { 902 uint32_t i; 903 904 trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr, 905 req->data, req->count, req->size); 906 907 if (req->size > sizeof(req->data)) { 908 hw_error("MMIO: bad size (%u)", req->size); 909 } 910 911 if (!req->data_is_ptr) { 912 if (req->dir == IOREQ_READ) { 913 for (i = 0; i < req->count; i++) { 914 read_phys_req_item(req->addr, req, i, &req->data); 915 } 916 } else if (req->dir == IOREQ_WRITE) { 917 for (i = 0; i < req->count; i++) { 918 write_phys_req_item(req->addr, req, i, &req->data); 919 } 920 } 921 } else { 922 uint64_t tmp; 923 924 if (req->dir == IOREQ_READ) { 925 for (i = 0; i < req->count; i++) { 926 read_phys_req_item(req->addr, req, i, &tmp); 927 write_phys_req_item(req->data, req, i, &tmp); 928 } 929 } else if (req->dir == IOREQ_WRITE) { 930 for (i = 0; i < req->count; i++) { 931 read_phys_req_item(req->data, req, i, &tmp); 932 write_phys_req_item(req->addr, req, i, &tmp); 933 } 934 } 935 } 936 } 937 938 static void cpu_ioreq_config(XenIOState *state, ioreq_t *req) 939 { 940 uint32_t sbdf = req->addr >> 32; 941 uint32_t reg = req->addr; 942 XenPciDevice *xendev; 943 944 if (req->size != sizeof(uint8_t) && req->size != sizeof(uint16_t) && 945 req->size != sizeof(uint32_t)) { 946 hw_error("PCI config access: bad size (%u)", req->size); 947 } 948 949 if (req->count != 1) { 950 hw_error("PCI config access: bad count (%u)", req->count); 951 } 952 953 QLIST_FOREACH(xendev, &state->dev_list, entry) { 954 if (xendev->sbdf != sbdf) { 955 continue; 956 } 957 958 if (!req->data_is_ptr) { 959 if (req->dir == IOREQ_READ) { 960 req->data = pci_host_config_read_common( 961 xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE, 962 req->size); 963 trace_cpu_ioreq_config_read(req, xendev->sbdf, reg, 964 req->size, req->data); 965 } else if (req->dir == IOREQ_WRITE) { 966 trace_cpu_ioreq_config_write(req, xendev->sbdf, reg, 967 req->size, req->data); 968 pci_host_config_write_common( 969 xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE, 970 req->data, req->size); 971 } 972 } else { 973 uint32_t tmp; 974 975 if (req->dir == IOREQ_READ) { 976 tmp = pci_host_config_read_common( 977 xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE, 978 req->size); 979 trace_cpu_ioreq_config_read(req, xendev->sbdf, reg, 980 req->size, tmp); 981 write_phys_req_item(req->data, req, 0, &tmp); 982 } else if (req->dir == IOREQ_WRITE) { 983 read_phys_req_item(req->data, req, 0, &tmp); 984 trace_cpu_ioreq_config_write(req, xendev->sbdf, reg, 985 req->size, tmp); 986 pci_host_config_write_common( 987 xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE, 988 tmp, req->size); 989 } 990 } 991 } 992 } 993 994 static void regs_to_cpu(vmware_regs_t *vmport_regs, ioreq_t *req) 995 { 996 X86CPU *cpu; 997 CPUX86State *env; 998 999 cpu = X86_CPU(current_cpu); 1000 env = &cpu->env; 1001 env->regs[R_EAX] = req->data; 1002 env->regs[R_EBX] = vmport_regs->ebx; 1003 env->regs[R_ECX] = vmport_regs->ecx; 1004 env->regs[R_EDX] = vmport_regs->edx; 1005 env->regs[R_ESI] = vmport_regs->esi; 1006 env->regs[R_EDI] = vmport_regs->edi; 1007 } 1008 1009 static void regs_from_cpu(vmware_regs_t *vmport_regs) 1010 { 1011 X86CPU *cpu = X86_CPU(current_cpu); 1012 CPUX86State *env = &cpu->env; 1013 1014 vmport_regs->ebx = env->regs[R_EBX]; 1015 vmport_regs->ecx = env->regs[R_ECX]; 1016 vmport_regs->edx = env->regs[R_EDX]; 1017 vmport_regs->esi = env->regs[R_ESI]; 1018 vmport_regs->edi = env->regs[R_EDI]; 1019 } 1020 1021 static void handle_vmport_ioreq(XenIOState *state, ioreq_t *req) 1022 { 1023 vmware_regs_t *vmport_regs; 1024 1025 assert(state->shared_vmport_page); 1026 vmport_regs = 1027 &state->shared_vmport_page->vcpu_vmport_regs[state->send_vcpu]; 1028 QEMU_BUILD_BUG_ON(sizeof(*req) < sizeof(*vmport_regs)); 1029 1030 current_cpu = state->cpu_by_vcpu_id[state->send_vcpu]; 1031 regs_to_cpu(vmport_regs, req); 1032 cpu_ioreq_pio(req); 1033 regs_from_cpu(vmport_regs); 1034 current_cpu = NULL; 1035 } 1036 1037 static void handle_ioreq(XenIOState *state, ioreq_t *req) 1038 { 1039 trace_handle_ioreq(req, req->type, req->dir, req->df, req->data_is_ptr, 1040 req->addr, req->data, req->count, req->size); 1041 1042 if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) && 1043 (req->size < sizeof (target_ulong))) { 1044 req->data &= ((target_ulong) 1 << (8 * req->size)) - 1; 1045 } 1046 1047 if (req->dir == IOREQ_WRITE) 1048 trace_handle_ioreq_write(req, req->type, req->df, req->data_is_ptr, 1049 req->addr, req->data, req->count, req->size); 1050 1051 switch (req->type) { 1052 case IOREQ_TYPE_PIO: 1053 cpu_ioreq_pio(req); 1054 break; 1055 case IOREQ_TYPE_COPY: 1056 cpu_ioreq_move(req); 1057 break; 1058 case IOREQ_TYPE_VMWARE_PORT: 1059 handle_vmport_ioreq(state, req); 1060 break; 1061 case IOREQ_TYPE_TIMEOFFSET: 1062 break; 1063 case IOREQ_TYPE_INVALIDATE: 1064 xen_invalidate_map_cache(); 1065 break; 1066 case IOREQ_TYPE_PCI_CONFIG: 1067 cpu_ioreq_config(state, req); 1068 break; 1069 default: 1070 hw_error("Invalid ioreq type 0x%x\n", req->type); 1071 } 1072 if (req->dir == IOREQ_READ) { 1073 trace_handle_ioreq_read(req, req->type, req->df, req->data_is_ptr, 1074 req->addr, req->data, req->count, req->size); 1075 } 1076 } 1077 1078 static int handle_buffered_iopage(XenIOState *state) 1079 { 1080 buffered_iopage_t *buf_page = state->buffered_io_page; 1081 buf_ioreq_t *buf_req = NULL; 1082 ioreq_t req; 1083 int qw; 1084 1085 if (!buf_page) { 1086 return 0; 1087 } 1088 1089 memset(&req, 0x00, sizeof(req)); 1090 req.state = STATE_IOREQ_READY; 1091 req.count = 1; 1092 req.dir = IOREQ_WRITE; 1093 1094 for (;;) { 1095 uint32_t rdptr = buf_page->read_pointer, wrptr; 1096 1097 xen_rmb(); 1098 wrptr = buf_page->write_pointer; 1099 xen_rmb(); 1100 if (rdptr != buf_page->read_pointer) { 1101 continue; 1102 } 1103 if (rdptr == wrptr) { 1104 break; 1105 } 1106 buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM]; 1107 req.size = 1U << buf_req->size; 1108 req.addr = buf_req->addr; 1109 req.data = buf_req->data; 1110 req.type = buf_req->type; 1111 xen_rmb(); 1112 qw = (req.size == 8); 1113 if (qw) { 1114 if (rdptr + 1 == wrptr) { 1115 hw_error("Incomplete quad word buffered ioreq"); 1116 } 1117 buf_req = &buf_page->buf_ioreq[(rdptr + 1) % 1118 IOREQ_BUFFER_SLOT_NUM]; 1119 req.data |= ((uint64_t)buf_req->data) << 32; 1120 xen_rmb(); 1121 } 1122 1123 handle_ioreq(state, &req); 1124 1125 /* Only req.data may get updated by handle_ioreq(), albeit even that 1126 * should not happen as such data would never make it to the guest (we 1127 * can only usefully see writes here after all). 1128 */ 1129 assert(req.state == STATE_IOREQ_READY); 1130 assert(req.count == 1); 1131 assert(req.dir == IOREQ_WRITE); 1132 assert(!req.data_is_ptr); 1133 1134 atomic_add(&buf_page->read_pointer, qw + 1); 1135 } 1136 1137 return req.count; 1138 } 1139 1140 static void handle_buffered_io(void *opaque) 1141 { 1142 XenIOState *state = opaque; 1143 1144 if (handle_buffered_iopage(state)) { 1145 timer_mod(state->buffered_io_timer, 1146 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); 1147 } else { 1148 timer_del(state->buffered_io_timer); 1149 xenevtchn_unmask(state->xce_handle, state->bufioreq_local_port); 1150 } 1151 } 1152 1153 static void cpu_handle_ioreq(void *opaque) 1154 { 1155 XenIOState *state = opaque; 1156 ioreq_t *req = cpu_get_ioreq(state); 1157 1158 handle_buffered_iopage(state); 1159 if (req) { 1160 ioreq_t copy = *req; 1161 1162 xen_rmb(); 1163 handle_ioreq(state, ©); 1164 req->data = copy.data; 1165 1166 if (req->state != STATE_IOREQ_INPROCESS) { 1167 fprintf(stderr, "Badness in I/O request ... not in service?!: " 1168 "%x, ptr: %x, port: %"PRIx64", " 1169 "data: %"PRIx64", count: %u, size: %u, type: %u\n", 1170 req->state, req->data_is_ptr, req->addr, 1171 req->data, req->count, req->size, req->type); 1172 destroy_hvm_domain(false); 1173 return; 1174 } 1175 1176 xen_wmb(); /* Update ioreq contents /then/ update state. */ 1177 1178 /* 1179 * We do this before we send the response so that the tools 1180 * have the opportunity to pick up on the reset before the 1181 * guest resumes and does a hlt with interrupts disabled which 1182 * causes Xen to powerdown the domain. 1183 */ 1184 if (runstate_is_running()) { 1185 ShutdownCause request; 1186 1187 if (qemu_shutdown_requested_get()) { 1188 destroy_hvm_domain(false); 1189 } 1190 request = qemu_reset_requested_get(); 1191 if (request) { 1192 qemu_system_reset(request); 1193 destroy_hvm_domain(true); 1194 } 1195 } 1196 1197 req->state = STATE_IORESP_READY; 1198 xenevtchn_notify(state->xce_handle, 1199 state->ioreq_local_port[state->send_vcpu]); 1200 } 1201 } 1202 1203 static void xen_main_loop_prepare(XenIOState *state) 1204 { 1205 int evtchn_fd = -1; 1206 1207 if (state->xce_handle != NULL) { 1208 evtchn_fd = xenevtchn_fd(state->xce_handle); 1209 } 1210 1211 state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io, 1212 state); 1213 1214 if (evtchn_fd != -1) { 1215 CPUState *cpu_state; 1216 1217 DPRINTF("%s: Init cpu_by_vcpu_id\n", __func__); 1218 CPU_FOREACH(cpu_state) { 1219 DPRINTF("%s: cpu_by_vcpu_id[%d]=%p\n", 1220 __func__, cpu_state->cpu_index, cpu_state); 1221 state->cpu_by_vcpu_id[cpu_state->cpu_index] = cpu_state; 1222 } 1223 qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state); 1224 } 1225 } 1226 1227 1228 static void xen_hvm_change_state_handler(void *opaque, int running, 1229 RunState rstate) 1230 { 1231 XenIOState *state = opaque; 1232 1233 if (running) { 1234 xen_main_loop_prepare(state); 1235 } 1236 1237 xen_set_ioreq_server_state(xen_domid, 1238 state->ioservid, 1239 (rstate == RUN_STATE_RUNNING)); 1240 } 1241 1242 static void xen_exit_notifier(Notifier *n, void *data) 1243 { 1244 XenIOState *state = container_of(n, XenIOState, exit); 1245 1246 xenevtchn_close(state->xce_handle); 1247 xs_daemon_close(state->xenstore); 1248 } 1249 1250 #ifdef XEN_COMPAT_PHYSMAP 1251 static void xen_read_physmap(XenIOState *state) 1252 { 1253 XenPhysmap *physmap = NULL; 1254 unsigned int len, num, i; 1255 char path[80], *value = NULL; 1256 char **entries = NULL; 1257 1258 snprintf(path, sizeof(path), 1259 "/local/domain/0/device-model/%d/physmap", xen_domid); 1260 entries = xs_directory(state->xenstore, 0, path, &num); 1261 if (entries == NULL) 1262 return; 1263 1264 for (i = 0; i < num; i++) { 1265 physmap = g_malloc(sizeof (XenPhysmap)); 1266 physmap->phys_offset = strtoull(entries[i], NULL, 16); 1267 snprintf(path, sizeof(path), 1268 "/local/domain/0/device-model/%d/physmap/%s/start_addr", 1269 xen_domid, entries[i]); 1270 value = xs_read(state->xenstore, 0, path, &len); 1271 if (value == NULL) { 1272 g_free(physmap); 1273 continue; 1274 } 1275 physmap->start_addr = strtoull(value, NULL, 16); 1276 free(value); 1277 1278 snprintf(path, sizeof(path), 1279 "/local/domain/0/device-model/%d/physmap/%s/size", 1280 xen_domid, entries[i]); 1281 value = xs_read(state->xenstore, 0, path, &len); 1282 if (value == NULL) { 1283 g_free(physmap); 1284 continue; 1285 } 1286 physmap->size = strtoull(value, NULL, 16); 1287 free(value); 1288 1289 snprintf(path, sizeof(path), 1290 "/local/domain/0/device-model/%d/physmap/%s/name", 1291 xen_domid, entries[i]); 1292 physmap->name = xs_read(state->xenstore, 0, path, &len); 1293 1294 QLIST_INSERT_HEAD(&xen_physmap, physmap, list); 1295 } 1296 free(entries); 1297 } 1298 #else 1299 static void xen_read_physmap(XenIOState *state) 1300 { 1301 } 1302 #endif 1303 1304 static void xen_wakeup_notifier(Notifier *notifier, void *data) 1305 { 1306 xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 0); 1307 } 1308 1309 static int xen_map_ioreq_server(XenIOState *state) 1310 { 1311 void *addr = NULL; 1312 xenforeignmemory_resource_handle *fres; 1313 xen_pfn_t ioreq_pfn; 1314 xen_pfn_t bufioreq_pfn; 1315 evtchn_port_t bufioreq_evtchn; 1316 int rc; 1317 1318 /* 1319 * Attempt to map using the resource API and fall back to normal 1320 * foreign mapping if this is not supported. 1321 */ 1322 QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_bufioreq != 0); 1323 QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_ioreq(0) != 1); 1324 fres = xenforeignmemory_map_resource(xen_fmem, xen_domid, 1325 XENMEM_resource_ioreq_server, 1326 state->ioservid, 0, 2, 1327 &addr, 1328 PROT_READ | PROT_WRITE, 0); 1329 if (fres != NULL) { 1330 trace_xen_map_resource_ioreq(state->ioservid, addr); 1331 state->buffered_io_page = addr; 1332 state->shared_page = addr + TARGET_PAGE_SIZE; 1333 } else if (errno != EOPNOTSUPP) { 1334 error_report("failed to map ioreq server resources: error %d handle=%p", 1335 errno, xen_xc); 1336 return -1; 1337 } 1338 1339 rc = xen_get_ioreq_server_info(xen_domid, state->ioservid, 1340 (state->shared_page == NULL) ? 1341 &ioreq_pfn : NULL, 1342 (state->buffered_io_page == NULL) ? 1343 &bufioreq_pfn : NULL, 1344 &bufioreq_evtchn); 1345 if (rc < 0) { 1346 error_report("failed to get ioreq server info: error %d handle=%p", 1347 errno, xen_xc); 1348 return rc; 1349 } 1350 1351 if (state->shared_page == NULL) { 1352 DPRINTF("shared page at pfn %lx\n", ioreq_pfn); 1353 1354 state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid, 1355 PROT_READ | PROT_WRITE, 1356 1, &ioreq_pfn, NULL); 1357 if (state->shared_page == NULL) { 1358 error_report("map shared IO page returned error %d handle=%p", 1359 errno, xen_xc); 1360 } 1361 } 1362 1363 if (state->buffered_io_page == NULL) { 1364 DPRINTF("buffered io page at pfn %lx\n", bufioreq_pfn); 1365 1366 state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid, 1367 PROT_READ | PROT_WRITE, 1368 1, &bufioreq_pfn, 1369 NULL); 1370 if (state->buffered_io_page == NULL) { 1371 error_report("map buffered IO page returned error %d", errno); 1372 return -1; 1373 } 1374 } 1375 1376 if (state->shared_page == NULL || state->buffered_io_page == NULL) { 1377 return -1; 1378 } 1379 1380 DPRINTF("buffered io evtchn is %x\n", bufioreq_evtchn); 1381 1382 state->bufioreq_remote_port = bufioreq_evtchn; 1383 1384 return 0; 1385 } 1386 1387 void xen_hvm_init(PCMachineState *pcms, MemoryRegion **ram_memory) 1388 { 1389 MachineState *ms = MACHINE(pcms); 1390 unsigned int max_cpus = ms->smp.max_cpus; 1391 int i, rc; 1392 xen_pfn_t ioreq_pfn; 1393 XenIOState *state; 1394 1395 state = g_malloc0(sizeof (XenIOState)); 1396 1397 state->xce_handle = xenevtchn_open(NULL, 0); 1398 if (state->xce_handle == NULL) { 1399 perror("xen: event channel open"); 1400 goto err; 1401 } 1402 1403 state->xenstore = xs_daemon_open(); 1404 if (state->xenstore == NULL) { 1405 perror("xen: xenstore open"); 1406 goto err; 1407 } 1408 1409 xen_create_ioreq_server(xen_domid, &state->ioservid); 1410 1411 state->exit.notify = xen_exit_notifier; 1412 qemu_add_exit_notifier(&state->exit); 1413 1414 state->suspend.notify = xen_suspend_notifier; 1415 qemu_register_suspend_notifier(&state->suspend); 1416 1417 state->wakeup.notify = xen_wakeup_notifier; 1418 qemu_register_wakeup_notifier(&state->wakeup); 1419 1420 /* 1421 * Register wake-up support in QMP query-current-machine API 1422 */ 1423 qemu_register_wakeup_support(); 1424 1425 rc = xen_map_ioreq_server(state); 1426 if (rc < 0) { 1427 goto err; 1428 } 1429 1430 rc = xen_get_vmport_regs_pfn(xen_xc, xen_domid, &ioreq_pfn); 1431 if (!rc) { 1432 DPRINTF("shared vmport page at pfn %lx\n", ioreq_pfn); 1433 state->shared_vmport_page = 1434 xenforeignmemory_map(xen_fmem, xen_domid, PROT_READ|PROT_WRITE, 1435 1, &ioreq_pfn, NULL); 1436 if (state->shared_vmport_page == NULL) { 1437 error_report("map shared vmport IO page returned error %d handle=%p", 1438 errno, xen_xc); 1439 goto err; 1440 } 1441 } else if (rc != -ENOSYS) { 1442 error_report("get vmport regs pfn returned error %d, rc=%d", 1443 errno, rc); 1444 goto err; 1445 } 1446 1447 /* Note: cpus is empty at this point in init */ 1448 state->cpu_by_vcpu_id = g_malloc0(max_cpus * sizeof(CPUState *)); 1449 1450 rc = xen_set_ioreq_server_state(xen_domid, state->ioservid, true); 1451 if (rc < 0) { 1452 error_report("failed to enable ioreq server info: error %d handle=%p", 1453 errno, xen_xc); 1454 goto err; 1455 } 1456 1457 state->ioreq_local_port = g_malloc0(max_cpus * sizeof (evtchn_port_t)); 1458 1459 /* FIXME: how about if we overflow the page here? */ 1460 for (i = 0; i < max_cpus; i++) { 1461 rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid, 1462 xen_vcpu_eport(state->shared_page, i)); 1463 if (rc == -1) { 1464 error_report("shared evtchn %d bind error %d", i, errno); 1465 goto err; 1466 } 1467 state->ioreq_local_port[i] = rc; 1468 } 1469 1470 rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid, 1471 state->bufioreq_remote_port); 1472 if (rc == -1) { 1473 error_report("buffered evtchn bind error %d", errno); 1474 goto err; 1475 } 1476 state->bufioreq_local_port = rc; 1477 1478 /* Init RAM management */ 1479 #ifdef XEN_COMPAT_PHYSMAP 1480 xen_map_cache_init(xen_phys_offset_to_gaddr, state); 1481 #else 1482 xen_map_cache_init(NULL, state); 1483 #endif 1484 xen_ram_init(pcms, ram_size, ram_memory); 1485 1486 qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state); 1487 1488 state->memory_listener = xen_memory_listener; 1489 memory_listener_register(&state->memory_listener, &address_space_memory); 1490 state->log_for_dirtybit = NULL; 1491 1492 state->io_listener = xen_io_listener; 1493 memory_listener_register(&state->io_listener, &address_space_io); 1494 1495 state->device_listener = xen_device_listener; 1496 QLIST_INIT(&state->dev_list); 1497 device_listener_register(&state->device_listener); 1498 1499 xen_bus_init(); 1500 1501 /* Initialize backend core & drivers */ 1502 if (xen_be_init() != 0) { 1503 error_report("xen backend core setup failed"); 1504 goto err; 1505 } 1506 xen_be_register_common(); 1507 1508 QLIST_INIT(&xen_physmap); 1509 xen_read_physmap(state); 1510 1511 /* Disable ACPI build because Xen handles it */ 1512 pcms->acpi_build_enabled = false; 1513 1514 return; 1515 1516 err: 1517 error_report("xen hardware virtual machine initialisation failed"); 1518 exit(1); 1519 } 1520 1521 void destroy_hvm_domain(bool reboot) 1522 { 1523 xc_interface *xc_handle; 1524 int sts; 1525 int rc; 1526 1527 unsigned int reason = reboot ? SHUTDOWN_reboot : SHUTDOWN_poweroff; 1528 1529 if (xen_dmod) { 1530 rc = xendevicemodel_shutdown(xen_dmod, xen_domid, reason); 1531 if (!rc) { 1532 return; 1533 } 1534 if (errno != ENOTTY /* old Xen */) { 1535 perror("xendevicemodel_shutdown failed"); 1536 } 1537 /* well, try the old thing then */ 1538 } 1539 1540 xc_handle = xc_interface_open(0, 0, 0); 1541 if (xc_handle == NULL) { 1542 fprintf(stderr, "Cannot acquire xenctrl handle\n"); 1543 } else { 1544 sts = xc_domain_shutdown(xc_handle, xen_domid, reason); 1545 if (sts != 0) { 1546 fprintf(stderr, "xc_domain_shutdown failed to issue %s, " 1547 "sts %d, %s\n", reboot ? "reboot" : "poweroff", 1548 sts, strerror(errno)); 1549 } else { 1550 fprintf(stderr, "Issued domain %d %s\n", xen_domid, 1551 reboot ? "reboot" : "poweroff"); 1552 } 1553 xc_interface_close(xc_handle); 1554 } 1555 } 1556 1557 void xen_register_framebuffer(MemoryRegion *mr) 1558 { 1559 framebuffer = mr; 1560 } 1561 1562 void xen_shutdown_fatal_error(const char *fmt, ...) 1563 { 1564 va_list ap; 1565 1566 va_start(ap, fmt); 1567 vfprintf(stderr, fmt, ap); 1568 va_end(ap); 1569 fprintf(stderr, "Will destroy the domain.\n"); 1570 /* destroy the domain */ 1571 qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR); 1572 } 1573 1574 void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length) 1575 { 1576 if (unlikely(xen_in_migration)) { 1577 int rc; 1578 ram_addr_t start_pfn, nb_pages; 1579 1580 start = xen_phys_offset_to_gaddr(start, length); 1581 1582 if (length == 0) { 1583 length = TARGET_PAGE_SIZE; 1584 } 1585 start_pfn = start >> TARGET_PAGE_BITS; 1586 nb_pages = ((start + length + TARGET_PAGE_SIZE - 1) >> TARGET_PAGE_BITS) 1587 - start_pfn; 1588 rc = xen_modified_memory(xen_domid, start_pfn, nb_pages); 1589 if (rc) { 1590 fprintf(stderr, 1591 "%s failed for "RAM_ADDR_FMT" ("RAM_ADDR_FMT"): %i, %s\n", 1592 __func__, start, nb_pages, errno, strerror(errno)); 1593 } 1594 } 1595 } 1596 1597 void qmp_xen_set_global_dirty_log(bool enable, Error **errp) 1598 { 1599 if (enable) { 1600 memory_global_dirty_log_start(); 1601 } else { 1602 memory_global_dirty_log_stop(); 1603 } 1604 } 1605