xref: /openbmc/qemu/hw/i386/xen/xen-hvm.c (revision 6a0acfff)
1 /*
2  * Copyright (C) 2010       Citrix Ltd.
3  *
4  * This work is licensed under the terms of the GNU GPL, version 2.  See
5  * the COPYING file in the top-level directory.
6  *
7  * Contributions after 2012-01-13 are licensed under the terms of the
8  * GNU GPL, version 2 or (at your option) any later version.
9  */
10 
11 #include "qemu/osdep.h"
12 
13 #include "cpu.h"
14 #include "hw/pci/pci.h"
15 #include "hw/pci/pci_host.h"
16 #include "hw/i386/pc.h"
17 #include "hw/irq.h"
18 #include "hw/i386/apic-msidef.h"
19 #include "hw/xen/xen_common.h"
20 #include "hw/xen/xen-legacy-backend.h"
21 #include "hw/xen/xen-bus.h"
22 #include "qapi/error.h"
23 #include "qapi/qapi-commands-misc.h"
24 #include "qemu/error-report.h"
25 #include "qemu/range.h"
26 #include "sysemu/xen-mapcache.h"
27 #include "trace.h"
28 #include "exec/address-spaces.h"
29 
30 #include <xen/hvm/ioreq.h>
31 #include <xen/hvm/e820.h>
32 
33 //#define DEBUG_XEN_HVM
34 
35 #ifdef DEBUG_XEN_HVM
36 #define DPRINTF(fmt, ...) \
37     do { fprintf(stderr, "xen: " fmt, ## __VA_ARGS__); } while (0)
38 #else
39 #define DPRINTF(fmt, ...) \
40     do { } while (0)
41 #endif
42 
43 static MemoryRegion ram_memory, ram_640k, ram_lo, ram_hi;
44 static MemoryRegion *framebuffer;
45 static bool xen_in_migration;
46 
47 /* Compatibility with older version */
48 
49 /* This allows QEMU to build on a system that has Xen 4.5 or earlier
50  * installed.  This here (not in hw/xen/xen_common.h) because xen/hvm/ioreq.h
51  * needs to be included before this block and hw/xen/xen_common.h needs to
52  * be included before xen/hvm/ioreq.h
53  */
54 #ifndef IOREQ_TYPE_VMWARE_PORT
55 #define IOREQ_TYPE_VMWARE_PORT  3
56 struct vmware_regs {
57     uint32_t esi;
58     uint32_t edi;
59     uint32_t ebx;
60     uint32_t ecx;
61     uint32_t edx;
62 };
63 typedef struct vmware_regs vmware_regs_t;
64 
65 struct shared_vmport_iopage {
66     struct vmware_regs vcpu_vmport_regs[1];
67 };
68 typedef struct shared_vmport_iopage shared_vmport_iopage_t;
69 #endif
70 
71 static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i)
72 {
73     return shared_page->vcpu_ioreq[i].vp_eport;
74 }
75 static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu)
76 {
77     return &shared_page->vcpu_ioreq[vcpu];
78 }
79 
80 #define BUFFER_IO_MAX_DELAY  100
81 
82 typedef struct XenPhysmap {
83     hwaddr start_addr;
84     ram_addr_t size;
85     const char *name;
86     hwaddr phys_offset;
87 
88     QLIST_ENTRY(XenPhysmap) list;
89 } XenPhysmap;
90 
91 static QLIST_HEAD(, XenPhysmap) xen_physmap;
92 
93 typedef struct XenPciDevice {
94     PCIDevice *pci_dev;
95     uint32_t sbdf;
96     QLIST_ENTRY(XenPciDevice) entry;
97 } XenPciDevice;
98 
99 typedef struct XenIOState {
100     ioservid_t ioservid;
101     shared_iopage_t *shared_page;
102     shared_vmport_iopage_t *shared_vmport_page;
103     buffered_iopage_t *buffered_io_page;
104     QEMUTimer *buffered_io_timer;
105     CPUState **cpu_by_vcpu_id;
106     /* the evtchn port for polling the notification, */
107     evtchn_port_t *ioreq_local_port;
108     /* evtchn remote and local ports for buffered io */
109     evtchn_port_t bufioreq_remote_port;
110     evtchn_port_t bufioreq_local_port;
111     /* the evtchn fd for polling */
112     xenevtchn_handle *xce_handle;
113     /* which vcpu we are serving */
114     int send_vcpu;
115 
116     struct xs_handle *xenstore;
117     MemoryListener memory_listener;
118     MemoryListener io_listener;
119     QLIST_HEAD(, XenPciDevice) dev_list;
120     DeviceListener device_listener;
121     hwaddr free_phys_offset;
122     const XenPhysmap *log_for_dirtybit;
123     /* Buffer used by xen_sync_dirty_bitmap */
124     unsigned long *dirty_bitmap;
125 
126     Notifier exit;
127     Notifier suspend;
128     Notifier wakeup;
129 } XenIOState;
130 
131 /* Xen specific function for piix pci */
132 
133 int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
134 {
135     return irq_num + ((pci_dev->devfn >> 3) << 2);
136 }
137 
138 void xen_piix3_set_irq(void *opaque, int irq_num, int level)
139 {
140     xen_set_pci_intx_level(xen_domid, 0, 0, irq_num >> 2,
141                            irq_num & 3, level);
142 }
143 
144 void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
145 {
146     int i;
147 
148     /* Scan for updates to PCI link routes (0x60-0x63). */
149     for (i = 0; i < len; i++) {
150         uint8_t v = (val >> (8 * i)) & 0xff;
151         if (v & 0x80) {
152             v = 0;
153         }
154         v &= 0xf;
155         if (((address + i) >= 0x60) && ((address + i) <= 0x63)) {
156             xen_set_pci_link_route(xen_domid, address + i - 0x60, v);
157         }
158     }
159 }
160 
161 int xen_is_pirq_msi(uint32_t msi_data)
162 {
163     /* If vector is 0, the msi is remapped into a pirq, passed as
164      * dest_id.
165      */
166     return ((msi_data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT) == 0;
167 }
168 
169 void xen_hvm_inject_msi(uint64_t addr, uint32_t data)
170 {
171     xen_inject_msi(xen_domid, addr, data);
172 }
173 
174 static void xen_suspend_notifier(Notifier *notifier, void *data)
175 {
176     xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 3);
177 }
178 
179 /* Xen Interrupt Controller */
180 
181 static void xen_set_irq(void *opaque, int irq, int level)
182 {
183     xen_set_isa_irq_level(xen_domid, irq, level);
184 }
185 
186 qemu_irq *xen_interrupt_controller_init(void)
187 {
188     return qemu_allocate_irqs(xen_set_irq, NULL, 16);
189 }
190 
191 /* Memory Ops */
192 
193 static void xen_ram_init(PCMachineState *pcms,
194                          ram_addr_t ram_size, MemoryRegion **ram_memory_p)
195 {
196     MemoryRegion *sysmem = get_system_memory();
197     ram_addr_t block_len;
198     uint64_t user_lowmem = object_property_get_uint(qdev_get_machine(),
199                                                     PC_MACHINE_MAX_RAM_BELOW_4G,
200                                                     &error_abort);
201 
202     /* Handle the machine opt max-ram-below-4g.  It is basically doing
203      * min(xen limit, user limit).
204      */
205     if (!user_lowmem) {
206         user_lowmem = HVM_BELOW_4G_RAM_END; /* default */
207     }
208     if (HVM_BELOW_4G_RAM_END <= user_lowmem) {
209         user_lowmem = HVM_BELOW_4G_RAM_END;
210     }
211 
212     if (ram_size >= user_lowmem) {
213         pcms->above_4g_mem_size = ram_size - user_lowmem;
214         pcms->below_4g_mem_size = user_lowmem;
215     } else {
216         pcms->above_4g_mem_size = 0;
217         pcms->below_4g_mem_size = ram_size;
218     }
219     if (!pcms->above_4g_mem_size) {
220         block_len = ram_size;
221     } else {
222         /*
223          * Xen does not allocate the memory continuously, it keeps a
224          * hole of the size computed above or passed in.
225          */
226         block_len = (1ULL << 32) + pcms->above_4g_mem_size;
227     }
228     memory_region_init_ram(&ram_memory, NULL, "xen.ram", block_len,
229                            &error_fatal);
230     *ram_memory_p = &ram_memory;
231 
232     memory_region_init_alias(&ram_640k, NULL, "xen.ram.640k",
233                              &ram_memory, 0, 0xa0000);
234     memory_region_add_subregion(sysmem, 0, &ram_640k);
235     /* Skip of the VGA IO memory space, it will be registered later by the VGA
236      * emulated device.
237      *
238      * The area between 0xc0000 and 0x100000 will be used by SeaBIOS to load
239      * the Options ROM, so it is registered here as RAM.
240      */
241     memory_region_init_alias(&ram_lo, NULL, "xen.ram.lo",
242                              &ram_memory, 0xc0000,
243                              pcms->below_4g_mem_size - 0xc0000);
244     memory_region_add_subregion(sysmem, 0xc0000, &ram_lo);
245     if (pcms->above_4g_mem_size > 0) {
246         memory_region_init_alias(&ram_hi, NULL, "xen.ram.hi",
247                                  &ram_memory, 0x100000000ULL,
248                                  pcms->above_4g_mem_size);
249         memory_region_add_subregion(sysmem, 0x100000000ULL, &ram_hi);
250     }
251 }
252 
253 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
254                    Error **errp)
255 {
256     unsigned long nr_pfn;
257     xen_pfn_t *pfn_list;
258     int i;
259 
260     if (runstate_check(RUN_STATE_INMIGRATE)) {
261         /* RAM already populated in Xen */
262         fprintf(stderr, "%s: do not alloc "RAM_ADDR_FMT
263                 " bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE\n",
264                 __func__, size, ram_addr);
265         return;
266     }
267 
268     if (mr == &ram_memory) {
269         return;
270     }
271 
272     trace_xen_ram_alloc(ram_addr, size);
273 
274     nr_pfn = size >> TARGET_PAGE_BITS;
275     pfn_list = g_malloc(sizeof (*pfn_list) * nr_pfn);
276 
277     for (i = 0; i < nr_pfn; i++) {
278         pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i;
279     }
280 
281     if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
282         error_setg(errp, "xen: failed to populate ram at " RAM_ADDR_FMT,
283                    ram_addr);
284     }
285 
286     g_free(pfn_list);
287 }
288 
289 static XenPhysmap *get_physmapping(hwaddr start_addr, ram_addr_t size)
290 {
291     XenPhysmap *physmap = NULL;
292 
293     start_addr &= TARGET_PAGE_MASK;
294 
295     QLIST_FOREACH(physmap, &xen_physmap, list) {
296         if (range_covers_byte(physmap->start_addr, physmap->size, start_addr)) {
297             return physmap;
298         }
299     }
300     return NULL;
301 }
302 
303 static hwaddr xen_phys_offset_to_gaddr(hwaddr phys_offset, ram_addr_t size)
304 {
305     hwaddr addr = phys_offset & TARGET_PAGE_MASK;
306     XenPhysmap *physmap = NULL;
307 
308     QLIST_FOREACH(physmap, &xen_physmap, list) {
309         if (range_covers_byte(physmap->phys_offset, physmap->size, addr)) {
310             return physmap->start_addr + (phys_offset - physmap->phys_offset);
311         }
312     }
313 
314     return phys_offset;
315 }
316 
317 #ifdef XEN_COMPAT_PHYSMAP
318 static int xen_save_physmap(XenIOState *state, XenPhysmap *physmap)
319 {
320     char path[80], value[17];
321 
322     snprintf(path, sizeof(path),
323             "/local/domain/0/device-model/%d/physmap/%"PRIx64"/start_addr",
324             xen_domid, (uint64_t)physmap->phys_offset);
325     snprintf(value, sizeof(value), "%"PRIx64, (uint64_t)physmap->start_addr);
326     if (!xs_write(state->xenstore, 0, path, value, strlen(value))) {
327         return -1;
328     }
329     snprintf(path, sizeof(path),
330             "/local/domain/0/device-model/%d/physmap/%"PRIx64"/size",
331             xen_domid, (uint64_t)physmap->phys_offset);
332     snprintf(value, sizeof(value), "%"PRIx64, (uint64_t)physmap->size);
333     if (!xs_write(state->xenstore, 0, path, value, strlen(value))) {
334         return -1;
335     }
336     if (physmap->name) {
337         snprintf(path, sizeof(path),
338                 "/local/domain/0/device-model/%d/physmap/%"PRIx64"/name",
339                 xen_domid, (uint64_t)physmap->phys_offset);
340         if (!xs_write(state->xenstore, 0, path,
341                       physmap->name, strlen(physmap->name))) {
342             return -1;
343         }
344     }
345     return 0;
346 }
347 #else
348 static int xen_save_physmap(XenIOState *state, XenPhysmap *physmap)
349 {
350     return 0;
351 }
352 #endif
353 
354 static int xen_add_to_physmap(XenIOState *state,
355                               hwaddr start_addr,
356                               ram_addr_t size,
357                               MemoryRegion *mr,
358                               hwaddr offset_within_region)
359 {
360     unsigned long nr_pages;
361     int rc = 0;
362     XenPhysmap *physmap = NULL;
363     hwaddr pfn, start_gpfn;
364     hwaddr phys_offset = memory_region_get_ram_addr(mr);
365     const char *mr_name;
366 
367     if (get_physmapping(start_addr, size)) {
368         return 0;
369     }
370     if (size <= 0) {
371         return -1;
372     }
373 
374     /* Xen can only handle a single dirty log region for now and we want
375      * the linear framebuffer to be that region.
376      * Avoid tracking any regions that is not videoram and avoid tracking
377      * the legacy vga region. */
378     if (mr == framebuffer && start_addr > 0xbffff) {
379         goto go_physmap;
380     }
381     return -1;
382 
383 go_physmap:
384     DPRINTF("mapping vram to %"HWADDR_PRIx" - %"HWADDR_PRIx"\n",
385             start_addr, start_addr + size);
386 
387     mr_name = memory_region_name(mr);
388 
389     physmap = g_malloc(sizeof(XenPhysmap));
390 
391     physmap->start_addr = start_addr;
392     physmap->size = size;
393     physmap->name = mr_name;
394     physmap->phys_offset = phys_offset;
395 
396     QLIST_INSERT_HEAD(&xen_physmap, physmap, list);
397 
398     if (runstate_check(RUN_STATE_INMIGRATE)) {
399         /* Now when we have a physmap entry we can replace a dummy mapping with
400          * a real one of guest foreign memory. */
401         uint8_t *p = xen_replace_cache_entry(phys_offset, start_addr, size);
402         assert(p && p == memory_region_get_ram_ptr(mr));
403 
404         return 0;
405     }
406 
407     pfn = phys_offset >> TARGET_PAGE_BITS;
408     start_gpfn = start_addr >> TARGET_PAGE_BITS;
409     nr_pages = size >> TARGET_PAGE_BITS;
410     rc = xendevicemodel_relocate_memory(xen_dmod, xen_domid, nr_pages, pfn,
411                                         start_gpfn);
412     if (rc) {
413         int saved_errno = errno;
414 
415         error_report("relocate_memory %lu pages from GFN %"HWADDR_PRIx
416                      " to GFN %"HWADDR_PRIx" failed: %s",
417                      nr_pages, pfn, start_gpfn, strerror(saved_errno));
418         errno = saved_errno;
419         return -1;
420     }
421 
422     rc = xendevicemodel_pin_memory_cacheattr(xen_dmod, xen_domid,
423                                    start_addr >> TARGET_PAGE_BITS,
424                                    (start_addr + size - 1) >> TARGET_PAGE_BITS,
425                                    XEN_DOMCTL_MEM_CACHEATTR_WB);
426     if (rc) {
427         error_report("pin_memory_cacheattr failed: %s", strerror(errno));
428     }
429     return xen_save_physmap(state, physmap);
430 }
431 
432 static int xen_remove_from_physmap(XenIOState *state,
433                                    hwaddr start_addr,
434                                    ram_addr_t size)
435 {
436     int rc = 0;
437     XenPhysmap *physmap = NULL;
438     hwaddr phys_offset = 0;
439 
440     physmap = get_physmapping(start_addr, size);
441     if (physmap == NULL) {
442         return -1;
443     }
444 
445     phys_offset = physmap->phys_offset;
446     size = physmap->size;
447 
448     DPRINTF("unmapping vram to %"HWADDR_PRIx" - %"HWADDR_PRIx", at "
449             "%"HWADDR_PRIx"\n", start_addr, start_addr + size, phys_offset);
450 
451     size >>= TARGET_PAGE_BITS;
452     start_addr >>= TARGET_PAGE_BITS;
453     phys_offset >>= TARGET_PAGE_BITS;
454     rc = xendevicemodel_relocate_memory(xen_dmod, xen_domid, size, start_addr,
455                                         phys_offset);
456     if (rc) {
457         int saved_errno = errno;
458 
459         error_report("relocate_memory "RAM_ADDR_FMT" pages"
460                      " from GFN %"HWADDR_PRIx
461                      " to GFN %"HWADDR_PRIx" failed: %s",
462                      size, start_addr, phys_offset, strerror(saved_errno));
463         errno = saved_errno;
464         return -1;
465     }
466 
467     QLIST_REMOVE(physmap, list);
468     if (state->log_for_dirtybit == physmap) {
469         state->log_for_dirtybit = NULL;
470         g_free(state->dirty_bitmap);
471         state->dirty_bitmap = NULL;
472     }
473     g_free(physmap);
474 
475     return 0;
476 }
477 
478 static void xen_set_memory(struct MemoryListener *listener,
479                            MemoryRegionSection *section,
480                            bool add)
481 {
482     XenIOState *state = container_of(listener, XenIOState, memory_listener);
483     hwaddr start_addr = section->offset_within_address_space;
484     ram_addr_t size = int128_get64(section->size);
485     bool log_dirty = memory_region_is_logging(section->mr, DIRTY_MEMORY_VGA);
486     hvmmem_type_t mem_type;
487 
488     if (section->mr == &ram_memory) {
489         return;
490     } else {
491         if (add) {
492             xen_map_memory_section(xen_domid, state->ioservid,
493                                    section);
494         } else {
495             xen_unmap_memory_section(xen_domid, state->ioservid,
496                                      section);
497         }
498     }
499 
500     if (!memory_region_is_ram(section->mr)) {
501         return;
502     }
503 
504     if (log_dirty != add) {
505         return;
506     }
507 
508     trace_xen_client_set_memory(start_addr, size, log_dirty);
509 
510     start_addr &= TARGET_PAGE_MASK;
511     size = TARGET_PAGE_ALIGN(size);
512 
513     if (add) {
514         if (!memory_region_is_rom(section->mr)) {
515             xen_add_to_physmap(state, start_addr, size,
516                                section->mr, section->offset_within_region);
517         } else {
518             mem_type = HVMMEM_ram_ro;
519             if (xen_set_mem_type(xen_domid, mem_type,
520                                  start_addr >> TARGET_PAGE_BITS,
521                                  size >> TARGET_PAGE_BITS)) {
522                 DPRINTF("xen_set_mem_type error, addr: "TARGET_FMT_plx"\n",
523                         start_addr);
524             }
525         }
526     } else {
527         if (xen_remove_from_physmap(state, start_addr, size) < 0) {
528             DPRINTF("physmapping does not exist at "TARGET_FMT_plx"\n", start_addr);
529         }
530     }
531 }
532 
533 static void xen_region_add(MemoryListener *listener,
534                            MemoryRegionSection *section)
535 {
536     memory_region_ref(section->mr);
537     xen_set_memory(listener, section, true);
538 }
539 
540 static void xen_region_del(MemoryListener *listener,
541                            MemoryRegionSection *section)
542 {
543     xen_set_memory(listener, section, false);
544     memory_region_unref(section->mr);
545 }
546 
547 static void xen_io_add(MemoryListener *listener,
548                        MemoryRegionSection *section)
549 {
550     XenIOState *state = container_of(listener, XenIOState, io_listener);
551     MemoryRegion *mr = section->mr;
552 
553     if (mr->ops == &unassigned_io_ops) {
554         return;
555     }
556 
557     memory_region_ref(mr);
558 
559     xen_map_io_section(xen_domid, state->ioservid, section);
560 }
561 
562 static void xen_io_del(MemoryListener *listener,
563                        MemoryRegionSection *section)
564 {
565     XenIOState *state = container_of(listener, XenIOState, io_listener);
566     MemoryRegion *mr = section->mr;
567 
568     if (mr->ops == &unassigned_io_ops) {
569         return;
570     }
571 
572     xen_unmap_io_section(xen_domid, state->ioservid, section);
573 
574     memory_region_unref(mr);
575 }
576 
577 static void xen_device_realize(DeviceListener *listener,
578                                DeviceState *dev)
579 {
580     XenIOState *state = container_of(listener, XenIOState, device_listener);
581 
582     if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
583         PCIDevice *pci_dev = PCI_DEVICE(dev);
584         XenPciDevice *xendev = g_new(XenPciDevice, 1);
585 
586         xendev->pci_dev = pci_dev;
587         xendev->sbdf = PCI_BUILD_BDF(pci_dev_bus_num(pci_dev),
588                                      pci_dev->devfn);
589         QLIST_INSERT_HEAD(&state->dev_list, xendev, entry);
590 
591         xen_map_pcidev(xen_domid, state->ioservid, pci_dev);
592     }
593 }
594 
595 static void xen_device_unrealize(DeviceListener *listener,
596                                  DeviceState *dev)
597 {
598     XenIOState *state = container_of(listener, XenIOState, device_listener);
599 
600     if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
601         PCIDevice *pci_dev = PCI_DEVICE(dev);
602         XenPciDevice *xendev, *next;
603 
604         xen_unmap_pcidev(xen_domid, state->ioservid, pci_dev);
605 
606         QLIST_FOREACH_SAFE(xendev, &state->dev_list, entry, next) {
607             if (xendev->pci_dev == pci_dev) {
608                 QLIST_REMOVE(xendev, entry);
609                 g_free(xendev);
610                 break;
611             }
612         }
613     }
614 }
615 
616 static void xen_sync_dirty_bitmap(XenIOState *state,
617                                   hwaddr start_addr,
618                                   ram_addr_t size)
619 {
620     hwaddr npages = size >> TARGET_PAGE_BITS;
621     const int width = sizeof(unsigned long) * 8;
622     size_t bitmap_size = DIV_ROUND_UP(npages, width);
623     int rc, i, j;
624     const XenPhysmap *physmap = NULL;
625 
626     physmap = get_physmapping(start_addr, size);
627     if (physmap == NULL) {
628         /* not handled */
629         return;
630     }
631 
632     if (state->log_for_dirtybit == NULL) {
633         state->log_for_dirtybit = physmap;
634         state->dirty_bitmap = g_new(unsigned long, bitmap_size);
635     } else if (state->log_for_dirtybit != physmap) {
636         /* Only one range for dirty bitmap can be tracked. */
637         return;
638     }
639 
640     rc = xen_track_dirty_vram(xen_domid, start_addr >> TARGET_PAGE_BITS,
641                               npages, state->dirty_bitmap);
642     if (rc < 0) {
643 #ifndef ENODATA
644 #define ENODATA  ENOENT
645 #endif
646         if (errno == ENODATA) {
647             memory_region_set_dirty(framebuffer, 0, size);
648             DPRINTF("xen: track_dirty_vram failed (0x" TARGET_FMT_plx
649                     ", 0x" TARGET_FMT_plx "): %s\n",
650                     start_addr, start_addr + size, strerror(errno));
651         }
652         return;
653     }
654 
655     for (i = 0; i < bitmap_size; i++) {
656         unsigned long map = state->dirty_bitmap[i];
657         while (map != 0) {
658             j = ctzl(map);
659             map &= ~(1ul << j);
660             memory_region_set_dirty(framebuffer,
661                                     (i * width + j) * TARGET_PAGE_SIZE,
662                                     TARGET_PAGE_SIZE);
663         };
664     }
665 }
666 
667 static void xen_log_start(MemoryListener *listener,
668                           MemoryRegionSection *section,
669                           int old, int new)
670 {
671     XenIOState *state = container_of(listener, XenIOState, memory_listener);
672 
673     if (new & ~old & (1 << DIRTY_MEMORY_VGA)) {
674         xen_sync_dirty_bitmap(state, section->offset_within_address_space,
675                               int128_get64(section->size));
676     }
677 }
678 
679 static void xen_log_stop(MemoryListener *listener, MemoryRegionSection *section,
680                          int old, int new)
681 {
682     XenIOState *state = container_of(listener, XenIOState, memory_listener);
683 
684     if (old & ~new & (1 << DIRTY_MEMORY_VGA)) {
685         state->log_for_dirtybit = NULL;
686         g_free(state->dirty_bitmap);
687         state->dirty_bitmap = NULL;
688         /* Disable dirty bit tracking */
689         xen_track_dirty_vram(xen_domid, 0, 0, NULL);
690     }
691 }
692 
693 static void xen_log_sync(MemoryListener *listener, MemoryRegionSection *section)
694 {
695     XenIOState *state = container_of(listener, XenIOState, memory_listener);
696 
697     xen_sync_dirty_bitmap(state, section->offset_within_address_space,
698                           int128_get64(section->size));
699 }
700 
701 static void xen_log_global_start(MemoryListener *listener)
702 {
703     if (xen_enabled()) {
704         xen_in_migration = true;
705     }
706 }
707 
708 static void xen_log_global_stop(MemoryListener *listener)
709 {
710     xen_in_migration = false;
711 }
712 
713 static MemoryListener xen_memory_listener = {
714     .region_add = xen_region_add,
715     .region_del = xen_region_del,
716     .log_start = xen_log_start,
717     .log_stop = xen_log_stop,
718     .log_sync = xen_log_sync,
719     .log_global_start = xen_log_global_start,
720     .log_global_stop = xen_log_global_stop,
721     .priority = 10,
722 };
723 
724 static MemoryListener xen_io_listener = {
725     .region_add = xen_io_add,
726     .region_del = xen_io_del,
727     .priority = 10,
728 };
729 
730 static DeviceListener xen_device_listener = {
731     .realize = xen_device_realize,
732     .unrealize = xen_device_unrealize,
733 };
734 
735 /* get the ioreq packets from share mem */
736 static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu)
737 {
738     ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu);
739 
740     if (req->state != STATE_IOREQ_READY) {
741         DPRINTF("I/O request not ready: "
742                 "%x, ptr: %x, port: %"PRIx64", "
743                 "data: %"PRIx64", count: %u, size: %u\n",
744                 req->state, req->data_is_ptr, req->addr,
745                 req->data, req->count, req->size);
746         return NULL;
747     }
748 
749     xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */
750 
751     req->state = STATE_IOREQ_INPROCESS;
752     return req;
753 }
754 
755 /* use poll to get the port notification */
756 /* ioreq_vec--out,the */
757 /* retval--the number of ioreq packet */
758 static ioreq_t *cpu_get_ioreq(XenIOState *state)
759 {
760     MachineState *ms = MACHINE(qdev_get_machine());
761     unsigned int max_cpus = ms->smp.max_cpus;
762     int i;
763     evtchn_port_t port;
764 
765     port = xenevtchn_pending(state->xce_handle);
766     if (port == state->bufioreq_local_port) {
767         timer_mod(state->buffered_io_timer,
768                 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
769         return NULL;
770     }
771 
772     if (port != -1) {
773         for (i = 0; i < max_cpus; i++) {
774             if (state->ioreq_local_port[i] == port) {
775                 break;
776             }
777         }
778 
779         if (i == max_cpus) {
780             hw_error("Fatal error while trying to get io event!\n");
781         }
782 
783         /* unmask the wanted port again */
784         xenevtchn_unmask(state->xce_handle, port);
785 
786         /* get the io packet from shared memory */
787         state->send_vcpu = i;
788         return cpu_get_ioreq_from_shared_memory(state, i);
789     }
790 
791     /* read error or read nothing */
792     return NULL;
793 }
794 
795 static uint32_t do_inp(uint32_t addr, unsigned long size)
796 {
797     switch (size) {
798         case 1:
799             return cpu_inb(addr);
800         case 2:
801             return cpu_inw(addr);
802         case 4:
803             return cpu_inl(addr);
804         default:
805             hw_error("inp: bad size: %04x %lx", addr, size);
806     }
807 }
808 
809 static void do_outp(uint32_t addr,
810         unsigned long size, uint32_t val)
811 {
812     switch (size) {
813         case 1:
814             return cpu_outb(addr, val);
815         case 2:
816             return cpu_outw(addr, val);
817         case 4:
818             return cpu_outl(addr, val);
819         default:
820             hw_error("outp: bad size: %04x %lx", addr, size);
821     }
822 }
823 
824 /*
825  * Helper functions which read/write an object from/to physical guest
826  * memory, as part of the implementation of an ioreq.
827  *
828  * Equivalent to
829  *   cpu_physical_memory_rw(addr + (req->df ? -1 : +1) * req->size * i,
830  *                          val, req->size, 0/1)
831  * except without the integer overflow problems.
832  */
833 static void rw_phys_req_item(hwaddr addr,
834                              ioreq_t *req, uint32_t i, void *val, int rw)
835 {
836     /* Do everything unsigned so overflow just results in a truncated result
837      * and accesses to undesired parts of guest memory, which is up
838      * to the guest */
839     hwaddr offset = (hwaddr)req->size * i;
840     if (req->df) {
841         addr -= offset;
842     } else {
843         addr += offset;
844     }
845     cpu_physical_memory_rw(addr, val, req->size, rw);
846 }
847 
848 static inline void read_phys_req_item(hwaddr addr,
849                                       ioreq_t *req, uint32_t i, void *val)
850 {
851     rw_phys_req_item(addr, req, i, val, 0);
852 }
853 static inline void write_phys_req_item(hwaddr addr,
854                                        ioreq_t *req, uint32_t i, void *val)
855 {
856     rw_phys_req_item(addr, req, i, val, 1);
857 }
858 
859 
860 static void cpu_ioreq_pio(ioreq_t *req)
861 {
862     uint32_t i;
863 
864     trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr,
865                          req->data, req->count, req->size);
866 
867     if (req->size > sizeof(uint32_t)) {
868         hw_error("PIO: bad size (%u)", req->size);
869     }
870 
871     if (req->dir == IOREQ_READ) {
872         if (!req->data_is_ptr) {
873             req->data = do_inp(req->addr, req->size);
874             trace_cpu_ioreq_pio_read_reg(req, req->data, req->addr,
875                                          req->size);
876         } else {
877             uint32_t tmp;
878 
879             for (i = 0; i < req->count; i++) {
880                 tmp = do_inp(req->addr, req->size);
881                 write_phys_req_item(req->data, req, i, &tmp);
882             }
883         }
884     } else if (req->dir == IOREQ_WRITE) {
885         if (!req->data_is_ptr) {
886             trace_cpu_ioreq_pio_write_reg(req, req->data, req->addr,
887                                           req->size);
888             do_outp(req->addr, req->size, req->data);
889         } else {
890             for (i = 0; i < req->count; i++) {
891                 uint32_t tmp = 0;
892 
893                 read_phys_req_item(req->data, req, i, &tmp);
894                 do_outp(req->addr, req->size, tmp);
895             }
896         }
897     }
898 }
899 
900 static void cpu_ioreq_move(ioreq_t *req)
901 {
902     uint32_t i;
903 
904     trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr,
905                          req->data, req->count, req->size);
906 
907     if (req->size > sizeof(req->data)) {
908         hw_error("MMIO: bad size (%u)", req->size);
909     }
910 
911     if (!req->data_is_ptr) {
912         if (req->dir == IOREQ_READ) {
913             for (i = 0; i < req->count; i++) {
914                 read_phys_req_item(req->addr, req, i, &req->data);
915             }
916         } else if (req->dir == IOREQ_WRITE) {
917             for (i = 0; i < req->count; i++) {
918                 write_phys_req_item(req->addr, req, i, &req->data);
919             }
920         }
921     } else {
922         uint64_t tmp;
923 
924         if (req->dir == IOREQ_READ) {
925             for (i = 0; i < req->count; i++) {
926                 read_phys_req_item(req->addr, req, i, &tmp);
927                 write_phys_req_item(req->data, req, i, &tmp);
928             }
929         } else if (req->dir == IOREQ_WRITE) {
930             for (i = 0; i < req->count; i++) {
931                 read_phys_req_item(req->data, req, i, &tmp);
932                 write_phys_req_item(req->addr, req, i, &tmp);
933             }
934         }
935     }
936 }
937 
938 static void cpu_ioreq_config(XenIOState *state, ioreq_t *req)
939 {
940     uint32_t sbdf = req->addr >> 32;
941     uint32_t reg = req->addr;
942     XenPciDevice *xendev;
943 
944     if (req->size != sizeof(uint8_t) && req->size != sizeof(uint16_t) &&
945         req->size != sizeof(uint32_t)) {
946         hw_error("PCI config access: bad size (%u)", req->size);
947     }
948 
949     if (req->count != 1) {
950         hw_error("PCI config access: bad count (%u)", req->count);
951     }
952 
953     QLIST_FOREACH(xendev, &state->dev_list, entry) {
954         if (xendev->sbdf != sbdf) {
955             continue;
956         }
957 
958         if (!req->data_is_ptr) {
959             if (req->dir == IOREQ_READ) {
960                 req->data = pci_host_config_read_common(
961                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
962                     req->size);
963                 trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
964                                             req->size, req->data);
965             } else if (req->dir == IOREQ_WRITE) {
966                 trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
967                                              req->size, req->data);
968                 pci_host_config_write_common(
969                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
970                     req->data, req->size);
971             }
972         } else {
973             uint32_t tmp;
974 
975             if (req->dir == IOREQ_READ) {
976                 tmp = pci_host_config_read_common(
977                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
978                     req->size);
979                 trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
980                                             req->size, tmp);
981                 write_phys_req_item(req->data, req, 0, &tmp);
982             } else if (req->dir == IOREQ_WRITE) {
983                 read_phys_req_item(req->data, req, 0, &tmp);
984                 trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
985                                              req->size, tmp);
986                 pci_host_config_write_common(
987                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
988                     tmp, req->size);
989             }
990         }
991     }
992 }
993 
994 static void regs_to_cpu(vmware_regs_t *vmport_regs, ioreq_t *req)
995 {
996     X86CPU *cpu;
997     CPUX86State *env;
998 
999     cpu = X86_CPU(current_cpu);
1000     env = &cpu->env;
1001     env->regs[R_EAX] = req->data;
1002     env->regs[R_EBX] = vmport_regs->ebx;
1003     env->regs[R_ECX] = vmport_regs->ecx;
1004     env->regs[R_EDX] = vmport_regs->edx;
1005     env->regs[R_ESI] = vmport_regs->esi;
1006     env->regs[R_EDI] = vmport_regs->edi;
1007 }
1008 
1009 static void regs_from_cpu(vmware_regs_t *vmport_regs)
1010 {
1011     X86CPU *cpu = X86_CPU(current_cpu);
1012     CPUX86State *env = &cpu->env;
1013 
1014     vmport_regs->ebx = env->regs[R_EBX];
1015     vmport_regs->ecx = env->regs[R_ECX];
1016     vmport_regs->edx = env->regs[R_EDX];
1017     vmport_regs->esi = env->regs[R_ESI];
1018     vmport_regs->edi = env->regs[R_EDI];
1019 }
1020 
1021 static void handle_vmport_ioreq(XenIOState *state, ioreq_t *req)
1022 {
1023     vmware_regs_t *vmport_regs;
1024 
1025     assert(state->shared_vmport_page);
1026     vmport_regs =
1027         &state->shared_vmport_page->vcpu_vmport_regs[state->send_vcpu];
1028     QEMU_BUILD_BUG_ON(sizeof(*req) < sizeof(*vmport_regs));
1029 
1030     current_cpu = state->cpu_by_vcpu_id[state->send_vcpu];
1031     regs_to_cpu(vmport_regs, req);
1032     cpu_ioreq_pio(req);
1033     regs_from_cpu(vmport_regs);
1034     current_cpu = NULL;
1035 }
1036 
1037 static void handle_ioreq(XenIOState *state, ioreq_t *req)
1038 {
1039     trace_handle_ioreq(req, req->type, req->dir, req->df, req->data_is_ptr,
1040                        req->addr, req->data, req->count, req->size);
1041 
1042     if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) &&
1043             (req->size < sizeof (target_ulong))) {
1044         req->data &= ((target_ulong) 1 << (8 * req->size)) - 1;
1045     }
1046 
1047     if (req->dir == IOREQ_WRITE)
1048         trace_handle_ioreq_write(req, req->type, req->df, req->data_is_ptr,
1049                                  req->addr, req->data, req->count, req->size);
1050 
1051     switch (req->type) {
1052         case IOREQ_TYPE_PIO:
1053             cpu_ioreq_pio(req);
1054             break;
1055         case IOREQ_TYPE_COPY:
1056             cpu_ioreq_move(req);
1057             break;
1058         case IOREQ_TYPE_VMWARE_PORT:
1059             handle_vmport_ioreq(state, req);
1060             break;
1061         case IOREQ_TYPE_TIMEOFFSET:
1062             break;
1063         case IOREQ_TYPE_INVALIDATE:
1064             xen_invalidate_map_cache();
1065             break;
1066         case IOREQ_TYPE_PCI_CONFIG:
1067             cpu_ioreq_config(state, req);
1068             break;
1069         default:
1070             hw_error("Invalid ioreq type 0x%x\n", req->type);
1071     }
1072     if (req->dir == IOREQ_READ) {
1073         trace_handle_ioreq_read(req, req->type, req->df, req->data_is_ptr,
1074                                 req->addr, req->data, req->count, req->size);
1075     }
1076 }
1077 
1078 static int handle_buffered_iopage(XenIOState *state)
1079 {
1080     buffered_iopage_t *buf_page = state->buffered_io_page;
1081     buf_ioreq_t *buf_req = NULL;
1082     ioreq_t req;
1083     int qw;
1084 
1085     if (!buf_page) {
1086         return 0;
1087     }
1088 
1089     memset(&req, 0x00, sizeof(req));
1090     req.state = STATE_IOREQ_READY;
1091     req.count = 1;
1092     req.dir = IOREQ_WRITE;
1093 
1094     for (;;) {
1095         uint32_t rdptr = buf_page->read_pointer, wrptr;
1096 
1097         xen_rmb();
1098         wrptr = buf_page->write_pointer;
1099         xen_rmb();
1100         if (rdptr != buf_page->read_pointer) {
1101             continue;
1102         }
1103         if (rdptr == wrptr) {
1104             break;
1105         }
1106         buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
1107         req.size = 1U << buf_req->size;
1108         req.addr = buf_req->addr;
1109         req.data = buf_req->data;
1110         req.type = buf_req->type;
1111         xen_rmb();
1112         qw = (req.size == 8);
1113         if (qw) {
1114             if (rdptr + 1 == wrptr) {
1115                 hw_error("Incomplete quad word buffered ioreq");
1116             }
1117             buf_req = &buf_page->buf_ioreq[(rdptr + 1) %
1118                                            IOREQ_BUFFER_SLOT_NUM];
1119             req.data |= ((uint64_t)buf_req->data) << 32;
1120             xen_rmb();
1121         }
1122 
1123         handle_ioreq(state, &req);
1124 
1125         /* Only req.data may get updated by handle_ioreq(), albeit even that
1126          * should not happen as such data would never make it to the guest (we
1127          * can only usefully see writes here after all).
1128          */
1129         assert(req.state == STATE_IOREQ_READY);
1130         assert(req.count == 1);
1131         assert(req.dir == IOREQ_WRITE);
1132         assert(!req.data_is_ptr);
1133 
1134         atomic_add(&buf_page->read_pointer, qw + 1);
1135     }
1136 
1137     return req.count;
1138 }
1139 
1140 static void handle_buffered_io(void *opaque)
1141 {
1142     XenIOState *state = opaque;
1143 
1144     if (handle_buffered_iopage(state)) {
1145         timer_mod(state->buffered_io_timer,
1146                 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
1147     } else {
1148         timer_del(state->buffered_io_timer);
1149         xenevtchn_unmask(state->xce_handle, state->bufioreq_local_port);
1150     }
1151 }
1152 
1153 static void cpu_handle_ioreq(void *opaque)
1154 {
1155     XenIOState *state = opaque;
1156     ioreq_t *req = cpu_get_ioreq(state);
1157 
1158     handle_buffered_iopage(state);
1159     if (req) {
1160         ioreq_t copy = *req;
1161 
1162         xen_rmb();
1163         handle_ioreq(state, &copy);
1164         req->data = copy.data;
1165 
1166         if (req->state != STATE_IOREQ_INPROCESS) {
1167             fprintf(stderr, "Badness in I/O request ... not in service?!: "
1168                     "%x, ptr: %x, port: %"PRIx64", "
1169                     "data: %"PRIx64", count: %u, size: %u, type: %u\n",
1170                     req->state, req->data_is_ptr, req->addr,
1171                     req->data, req->count, req->size, req->type);
1172             destroy_hvm_domain(false);
1173             return;
1174         }
1175 
1176         xen_wmb(); /* Update ioreq contents /then/ update state. */
1177 
1178         /*
1179          * We do this before we send the response so that the tools
1180          * have the opportunity to pick up on the reset before the
1181          * guest resumes and does a hlt with interrupts disabled which
1182          * causes Xen to powerdown the domain.
1183          */
1184         if (runstate_is_running()) {
1185             ShutdownCause request;
1186 
1187             if (qemu_shutdown_requested_get()) {
1188                 destroy_hvm_domain(false);
1189             }
1190             request = qemu_reset_requested_get();
1191             if (request) {
1192                 qemu_system_reset(request);
1193                 destroy_hvm_domain(true);
1194             }
1195         }
1196 
1197         req->state = STATE_IORESP_READY;
1198         xenevtchn_notify(state->xce_handle,
1199                          state->ioreq_local_port[state->send_vcpu]);
1200     }
1201 }
1202 
1203 static void xen_main_loop_prepare(XenIOState *state)
1204 {
1205     int evtchn_fd = -1;
1206 
1207     if (state->xce_handle != NULL) {
1208         evtchn_fd = xenevtchn_fd(state->xce_handle);
1209     }
1210 
1211     state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io,
1212                                                  state);
1213 
1214     if (evtchn_fd != -1) {
1215         CPUState *cpu_state;
1216 
1217         DPRINTF("%s: Init cpu_by_vcpu_id\n", __func__);
1218         CPU_FOREACH(cpu_state) {
1219             DPRINTF("%s: cpu_by_vcpu_id[%d]=%p\n",
1220                     __func__, cpu_state->cpu_index, cpu_state);
1221             state->cpu_by_vcpu_id[cpu_state->cpu_index] = cpu_state;
1222         }
1223         qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state);
1224     }
1225 }
1226 
1227 
1228 static void xen_hvm_change_state_handler(void *opaque, int running,
1229                                          RunState rstate)
1230 {
1231     XenIOState *state = opaque;
1232 
1233     if (running) {
1234         xen_main_loop_prepare(state);
1235     }
1236 
1237     xen_set_ioreq_server_state(xen_domid,
1238                                state->ioservid,
1239                                (rstate == RUN_STATE_RUNNING));
1240 }
1241 
1242 static void xen_exit_notifier(Notifier *n, void *data)
1243 {
1244     XenIOState *state = container_of(n, XenIOState, exit);
1245 
1246     xenevtchn_close(state->xce_handle);
1247     xs_daemon_close(state->xenstore);
1248 }
1249 
1250 #ifdef XEN_COMPAT_PHYSMAP
1251 static void xen_read_physmap(XenIOState *state)
1252 {
1253     XenPhysmap *physmap = NULL;
1254     unsigned int len, num, i;
1255     char path[80], *value = NULL;
1256     char **entries = NULL;
1257 
1258     snprintf(path, sizeof(path),
1259             "/local/domain/0/device-model/%d/physmap", xen_domid);
1260     entries = xs_directory(state->xenstore, 0, path, &num);
1261     if (entries == NULL)
1262         return;
1263 
1264     for (i = 0; i < num; i++) {
1265         physmap = g_malloc(sizeof (XenPhysmap));
1266         physmap->phys_offset = strtoull(entries[i], NULL, 16);
1267         snprintf(path, sizeof(path),
1268                 "/local/domain/0/device-model/%d/physmap/%s/start_addr",
1269                 xen_domid, entries[i]);
1270         value = xs_read(state->xenstore, 0, path, &len);
1271         if (value == NULL) {
1272             g_free(physmap);
1273             continue;
1274         }
1275         physmap->start_addr = strtoull(value, NULL, 16);
1276         free(value);
1277 
1278         snprintf(path, sizeof(path),
1279                 "/local/domain/0/device-model/%d/physmap/%s/size",
1280                 xen_domid, entries[i]);
1281         value = xs_read(state->xenstore, 0, path, &len);
1282         if (value == NULL) {
1283             g_free(physmap);
1284             continue;
1285         }
1286         physmap->size = strtoull(value, NULL, 16);
1287         free(value);
1288 
1289         snprintf(path, sizeof(path),
1290                 "/local/domain/0/device-model/%d/physmap/%s/name",
1291                 xen_domid, entries[i]);
1292         physmap->name = xs_read(state->xenstore, 0, path, &len);
1293 
1294         QLIST_INSERT_HEAD(&xen_physmap, physmap, list);
1295     }
1296     free(entries);
1297 }
1298 #else
1299 static void xen_read_physmap(XenIOState *state)
1300 {
1301 }
1302 #endif
1303 
1304 static void xen_wakeup_notifier(Notifier *notifier, void *data)
1305 {
1306     xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 0);
1307 }
1308 
1309 static int xen_map_ioreq_server(XenIOState *state)
1310 {
1311     void *addr = NULL;
1312     xenforeignmemory_resource_handle *fres;
1313     xen_pfn_t ioreq_pfn;
1314     xen_pfn_t bufioreq_pfn;
1315     evtchn_port_t bufioreq_evtchn;
1316     int rc;
1317 
1318     /*
1319      * Attempt to map using the resource API and fall back to normal
1320      * foreign mapping if this is not supported.
1321      */
1322     QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_bufioreq != 0);
1323     QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_ioreq(0) != 1);
1324     fres = xenforeignmemory_map_resource(xen_fmem, xen_domid,
1325                                          XENMEM_resource_ioreq_server,
1326                                          state->ioservid, 0, 2,
1327                                          &addr,
1328                                          PROT_READ | PROT_WRITE, 0);
1329     if (fres != NULL) {
1330         trace_xen_map_resource_ioreq(state->ioservid, addr);
1331         state->buffered_io_page = addr;
1332         state->shared_page = addr + TARGET_PAGE_SIZE;
1333     } else if (errno != EOPNOTSUPP) {
1334         error_report("failed to map ioreq server resources: error %d handle=%p",
1335                      errno, xen_xc);
1336         return -1;
1337     }
1338 
1339     rc = xen_get_ioreq_server_info(xen_domid, state->ioservid,
1340                                    (state->shared_page == NULL) ?
1341                                    &ioreq_pfn : NULL,
1342                                    (state->buffered_io_page == NULL) ?
1343                                    &bufioreq_pfn : NULL,
1344                                    &bufioreq_evtchn);
1345     if (rc < 0) {
1346         error_report("failed to get ioreq server info: error %d handle=%p",
1347                      errno, xen_xc);
1348         return rc;
1349     }
1350 
1351     if (state->shared_page == NULL) {
1352         DPRINTF("shared page at pfn %lx\n", ioreq_pfn);
1353 
1354         state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid,
1355                                                   PROT_READ | PROT_WRITE,
1356                                                   1, &ioreq_pfn, NULL);
1357         if (state->shared_page == NULL) {
1358             error_report("map shared IO page returned error %d handle=%p",
1359                          errno, xen_xc);
1360         }
1361     }
1362 
1363     if (state->buffered_io_page == NULL) {
1364         DPRINTF("buffered io page at pfn %lx\n", bufioreq_pfn);
1365 
1366         state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid,
1367                                                        PROT_READ | PROT_WRITE,
1368                                                        1, &bufioreq_pfn,
1369                                                        NULL);
1370         if (state->buffered_io_page == NULL) {
1371             error_report("map buffered IO page returned error %d", errno);
1372             return -1;
1373         }
1374     }
1375 
1376     if (state->shared_page == NULL || state->buffered_io_page == NULL) {
1377         return -1;
1378     }
1379 
1380     DPRINTF("buffered io evtchn is %x\n", bufioreq_evtchn);
1381 
1382     state->bufioreq_remote_port = bufioreq_evtchn;
1383 
1384     return 0;
1385 }
1386 
1387 void xen_hvm_init(PCMachineState *pcms, MemoryRegion **ram_memory)
1388 {
1389     MachineState *ms = MACHINE(pcms);
1390     unsigned int max_cpus = ms->smp.max_cpus;
1391     int i, rc;
1392     xen_pfn_t ioreq_pfn;
1393     XenIOState *state;
1394 
1395     state = g_malloc0(sizeof (XenIOState));
1396 
1397     state->xce_handle = xenevtchn_open(NULL, 0);
1398     if (state->xce_handle == NULL) {
1399         perror("xen: event channel open");
1400         goto err;
1401     }
1402 
1403     state->xenstore = xs_daemon_open();
1404     if (state->xenstore == NULL) {
1405         perror("xen: xenstore open");
1406         goto err;
1407     }
1408 
1409     xen_create_ioreq_server(xen_domid, &state->ioservid);
1410 
1411     state->exit.notify = xen_exit_notifier;
1412     qemu_add_exit_notifier(&state->exit);
1413 
1414     state->suspend.notify = xen_suspend_notifier;
1415     qemu_register_suspend_notifier(&state->suspend);
1416 
1417     state->wakeup.notify = xen_wakeup_notifier;
1418     qemu_register_wakeup_notifier(&state->wakeup);
1419 
1420     /*
1421      * Register wake-up support in QMP query-current-machine API
1422      */
1423     qemu_register_wakeup_support();
1424 
1425     rc = xen_map_ioreq_server(state);
1426     if (rc < 0) {
1427         goto err;
1428     }
1429 
1430     rc = xen_get_vmport_regs_pfn(xen_xc, xen_domid, &ioreq_pfn);
1431     if (!rc) {
1432         DPRINTF("shared vmport page at pfn %lx\n", ioreq_pfn);
1433         state->shared_vmport_page =
1434             xenforeignmemory_map(xen_fmem, xen_domid, PROT_READ|PROT_WRITE,
1435                                  1, &ioreq_pfn, NULL);
1436         if (state->shared_vmport_page == NULL) {
1437             error_report("map shared vmport IO page returned error %d handle=%p",
1438                          errno, xen_xc);
1439             goto err;
1440         }
1441     } else if (rc != -ENOSYS) {
1442         error_report("get vmport regs pfn returned error %d, rc=%d",
1443                      errno, rc);
1444         goto err;
1445     }
1446 
1447     /* Note: cpus is empty at this point in init */
1448     state->cpu_by_vcpu_id = g_malloc0(max_cpus * sizeof(CPUState *));
1449 
1450     rc = xen_set_ioreq_server_state(xen_domid, state->ioservid, true);
1451     if (rc < 0) {
1452         error_report("failed to enable ioreq server info: error %d handle=%p",
1453                      errno, xen_xc);
1454         goto err;
1455     }
1456 
1457     state->ioreq_local_port = g_malloc0(max_cpus * sizeof (evtchn_port_t));
1458 
1459     /* FIXME: how about if we overflow the page here? */
1460     for (i = 0; i < max_cpus; i++) {
1461         rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid,
1462                                         xen_vcpu_eport(state->shared_page, i));
1463         if (rc == -1) {
1464             error_report("shared evtchn %d bind error %d", i, errno);
1465             goto err;
1466         }
1467         state->ioreq_local_port[i] = rc;
1468     }
1469 
1470     rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid,
1471                                     state->bufioreq_remote_port);
1472     if (rc == -1) {
1473         error_report("buffered evtchn bind error %d", errno);
1474         goto err;
1475     }
1476     state->bufioreq_local_port = rc;
1477 
1478     /* Init RAM management */
1479 #ifdef XEN_COMPAT_PHYSMAP
1480     xen_map_cache_init(xen_phys_offset_to_gaddr, state);
1481 #else
1482     xen_map_cache_init(NULL, state);
1483 #endif
1484     xen_ram_init(pcms, ram_size, ram_memory);
1485 
1486     qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
1487 
1488     state->memory_listener = xen_memory_listener;
1489     memory_listener_register(&state->memory_listener, &address_space_memory);
1490     state->log_for_dirtybit = NULL;
1491 
1492     state->io_listener = xen_io_listener;
1493     memory_listener_register(&state->io_listener, &address_space_io);
1494 
1495     state->device_listener = xen_device_listener;
1496     QLIST_INIT(&state->dev_list);
1497     device_listener_register(&state->device_listener);
1498 
1499     xen_bus_init();
1500 
1501     /* Initialize backend core & drivers */
1502     if (xen_be_init() != 0) {
1503         error_report("xen backend core setup failed");
1504         goto err;
1505     }
1506     xen_be_register_common();
1507 
1508     QLIST_INIT(&xen_physmap);
1509     xen_read_physmap(state);
1510 
1511     /* Disable ACPI build because Xen handles it */
1512     pcms->acpi_build_enabled = false;
1513 
1514     return;
1515 
1516 err:
1517     error_report("xen hardware virtual machine initialisation failed");
1518     exit(1);
1519 }
1520 
1521 void destroy_hvm_domain(bool reboot)
1522 {
1523     xc_interface *xc_handle;
1524     int sts;
1525     int rc;
1526 
1527     unsigned int reason = reboot ? SHUTDOWN_reboot : SHUTDOWN_poweroff;
1528 
1529     if (xen_dmod) {
1530         rc = xendevicemodel_shutdown(xen_dmod, xen_domid, reason);
1531         if (!rc) {
1532             return;
1533         }
1534         if (errno != ENOTTY /* old Xen */) {
1535             perror("xendevicemodel_shutdown failed");
1536         }
1537         /* well, try the old thing then */
1538     }
1539 
1540     xc_handle = xc_interface_open(0, 0, 0);
1541     if (xc_handle == NULL) {
1542         fprintf(stderr, "Cannot acquire xenctrl handle\n");
1543     } else {
1544         sts = xc_domain_shutdown(xc_handle, xen_domid, reason);
1545         if (sts != 0) {
1546             fprintf(stderr, "xc_domain_shutdown failed to issue %s, "
1547                     "sts %d, %s\n", reboot ? "reboot" : "poweroff",
1548                     sts, strerror(errno));
1549         } else {
1550             fprintf(stderr, "Issued domain %d %s\n", xen_domid,
1551                     reboot ? "reboot" : "poweroff");
1552         }
1553         xc_interface_close(xc_handle);
1554     }
1555 }
1556 
1557 void xen_register_framebuffer(MemoryRegion *mr)
1558 {
1559     framebuffer = mr;
1560 }
1561 
1562 void xen_shutdown_fatal_error(const char *fmt, ...)
1563 {
1564     va_list ap;
1565 
1566     va_start(ap, fmt);
1567     vfprintf(stderr, fmt, ap);
1568     va_end(ap);
1569     fprintf(stderr, "Will destroy the domain.\n");
1570     /* destroy the domain */
1571     qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR);
1572 }
1573 
1574 void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length)
1575 {
1576     if (unlikely(xen_in_migration)) {
1577         int rc;
1578         ram_addr_t start_pfn, nb_pages;
1579 
1580         start = xen_phys_offset_to_gaddr(start, length);
1581 
1582         if (length == 0) {
1583             length = TARGET_PAGE_SIZE;
1584         }
1585         start_pfn = start >> TARGET_PAGE_BITS;
1586         nb_pages = ((start + length + TARGET_PAGE_SIZE - 1) >> TARGET_PAGE_BITS)
1587             - start_pfn;
1588         rc = xen_modified_memory(xen_domid, start_pfn, nb_pages);
1589         if (rc) {
1590             fprintf(stderr,
1591                     "%s failed for "RAM_ADDR_FMT" ("RAM_ADDR_FMT"): %i, %s\n",
1592                     __func__, start, nb_pages, errno, strerror(errno));
1593         }
1594     }
1595 }
1596 
1597 void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
1598 {
1599     if (enable) {
1600         memory_global_dirty_log_start();
1601     } else {
1602         memory_global_dirty_log_stop();
1603     }
1604 }
1605