xref: /openbmc/qemu/hw/xen/xen-hvm-common.c (revision 76bccf3c)
1 #include "qemu/osdep.h"
2 #include "qemu/units.h"
3 #include "qapi/error.h"
4 #include "exec/target_page.h"
5 #include "trace.h"
6 
7 #include "hw/pci/pci_host.h"
8 #include "hw/xen/xen-hvm-common.h"
9 #include "hw/xen/xen-bus.h"
10 #include "hw/boards.h"
11 #include "hw/xen/arch_hvm.h"
12 
13 MemoryRegion xen_memory, xen_grants;
14 
15 /* Check for any kind of xen memory, foreign mappings or grants.  */
16 bool xen_mr_is_memory(MemoryRegion *mr)
17 {
18     return mr == &xen_memory || mr == &xen_grants;
19 }
20 
21 /* Check specifically for grants.  */
22 bool xen_mr_is_grants(MemoryRegion *mr)
23 {
24     return mr == &xen_grants;
25 }
26 
27 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
28                    Error **errp)
29 {
30     unsigned target_page_bits = qemu_target_page_bits();
31     unsigned long nr_pfn;
32     xen_pfn_t *pfn_list;
33     int i;
34 
35     if (runstate_check(RUN_STATE_INMIGRATE)) {
36         /* RAM already populated in Xen */
37         warn_report("%s: do not alloc "RAM_ADDR_FMT
38                 " bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE",
39                 __func__, size, ram_addr);
40         return;
41     }
42 
43     if (xen_mr_is_memory(mr)) {
44         return;
45     }
46 
47     trace_xen_ram_alloc(ram_addr, size);
48 
49     nr_pfn = size >> target_page_bits;
50     pfn_list = g_new(xen_pfn_t, nr_pfn);
51 
52     for (i = 0; i < nr_pfn; i++) {
53         pfn_list[i] = (ram_addr >> target_page_bits) + i;
54     }
55 
56     if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
57         error_setg(errp, "xen: failed to populate ram at " RAM_ADDR_FMT,
58                    ram_addr);
59     }
60 
61     g_free(pfn_list);
62 }
63 
64 static void xen_set_memory(struct MemoryListener *listener,
65                            MemoryRegionSection *section,
66                            bool add)
67 {
68     XenIOState *state = container_of(listener, XenIOState, memory_listener);
69 
70     if (xen_mr_is_memory(section->mr)) {
71         return;
72     } else {
73         if (add) {
74             xen_map_memory_section(xen_domid, state->ioservid,
75                                    section);
76         } else {
77             xen_unmap_memory_section(xen_domid, state->ioservid,
78                                      section);
79         }
80     }
81 
82     arch_xen_set_memory(state, section, add);
83 }
84 
85 void xen_region_add(MemoryListener *listener,
86                            MemoryRegionSection *section)
87 {
88     memory_region_ref(section->mr);
89     xen_set_memory(listener, section, true);
90 }
91 
92 void xen_region_del(MemoryListener *listener,
93                            MemoryRegionSection *section)
94 {
95     xen_set_memory(listener, section, false);
96     memory_region_unref(section->mr);
97 }
98 
99 void xen_io_add(MemoryListener *listener,
100                        MemoryRegionSection *section)
101 {
102     XenIOState *state = container_of(listener, XenIOState, io_listener);
103     MemoryRegion *mr = section->mr;
104 
105     if (mr->ops == &unassigned_io_ops) {
106         return;
107     }
108 
109     memory_region_ref(mr);
110 
111     xen_map_io_section(xen_domid, state->ioservid, section);
112 }
113 
114 void xen_io_del(MemoryListener *listener,
115                        MemoryRegionSection *section)
116 {
117     XenIOState *state = container_of(listener, XenIOState, io_listener);
118     MemoryRegion *mr = section->mr;
119 
120     if (mr->ops == &unassigned_io_ops) {
121         return;
122     }
123 
124     xen_unmap_io_section(xen_domid, state->ioservid, section);
125 
126     memory_region_unref(mr);
127 }
128 
129 void xen_device_realize(DeviceListener *listener,
130                                DeviceState *dev)
131 {
132     XenIOState *state = container_of(listener, XenIOState, device_listener);
133 
134     if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
135         PCIDevice *pci_dev = PCI_DEVICE(dev);
136         XenPciDevice *xendev = g_new(XenPciDevice, 1);
137 
138         xendev->pci_dev = pci_dev;
139         xendev->sbdf = PCI_BUILD_BDF(pci_dev_bus_num(pci_dev),
140                                      pci_dev->devfn);
141         QLIST_INSERT_HEAD(&state->dev_list, xendev, entry);
142 
143         xen_map_pcidev(xen_domid, state->ioservid, pci_dev);
144     }
145 }
146 
147 void xen_device_unrealize(DeviceListener *listener,
148                                  DeviceState *dev)
149 {
150     XenIOState *state = container_of(listener, XenIOState, device_listener);
151 
152     if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
153         PCIDevice *pci_dev = PCI_DEVICE(dev);
154         XenPciDevice *xendev, *next;
155 
156         xen_unmap_pcidev(xen_domid, state->ioservid, pci_dev);
157 
158         QLIST_FOREACH_SAFE(xendev, &state->dev_list, entry, next) {
159             if (xendev->pci_dev == pci_dev) {
160                 QLIST_REMOVE(xendev, entry);
161                 g_free(xendev);
162                 break;
163             }
164         }
165     }
166 }
167 
168 MemoryListener xen_io_listener = {
169     .name = "xen-io",
170     .region_add = xen_io_add,
171     .region_del = xen_io_del,
172     .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
173 };
174 
175 DeviceListener xen_device_listener = {
176     .realize = xen_device_realize,
177     .unrealize = xen_device_unrealize,
178 };
179 
180 /* get the ioreq packets from share mem */
181 static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu)
182 {
183     ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu);
184 
185     if (req->state != STATE_IOREQ_READY) {
186         trace_cpu_get_ioreq_from_shared_memory_req_not_ready(req->state,
187                                                              req->data_is_ptr,
188                                                              req->addr,
189                                                              req->data,
190                                                              req->count,
191                                                              req->size);
192         return NULL;
193     }
194 
195     xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */
196 
197     req->state = STATE_IOREQ_INPROCESS;
198     return req;
199 }
200 
201 /* use poll to get the port notification */
202 /* ioreq_vec--out,the */
203 /* retval--the number of ioreq packet */
204 static ioreq_t *cpu_get_ioreq(XenIOState *state)
205 {
206     MachineState *ms = MACHINE(qdev_get_machine());
207     unsigned int max_cpus = ms->smp.max_cpus;
208     int i;
209     evtchn_port_t port;
210 
211     port = qemu_xen_evtchn_pending(state->xce_handle);
212     if (port == state->bufioreq_local_port) {
213         timer_mod(state->buffered_io_timer,
214                 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
215         return NULL;
216     }
217 
218     if (port != -1) {
219         for (i = 0; i < max_cpus; i++) {
220             if (state->ioreq_local_port[i] == port) {
221                 break;
222             }
223         }
224 
225         if (i == max_cpus) {
226             hw_error("Fatal error while trying to get io event!\n");
227         }
228 
229         /* unmask the wanted port again */
230         qemu_xen_evtchn_unmask(state->xce_handle, port);
231 
232         /* get the io packet from shared memory */
233         state->send_vcpu = i;
234         return cpu_get_ioreq_from_shared_memory(state, i);
235     }
236 
237     /* read error or read nothing */
238     return NULL;
239 }
240 
241 static uint32_t do_inp(uint32_t addr, unsigned long size)
242 {
243     switch (size) {
244         case 1:
245             return cpu_inb(addr);
246         case 2:
247             return cpu_inw(addr);
248         case 4:
249             return cpu_inl(addr);
250         default:
251             hw_error("inp: bad size: %04x %lx", addr, size);
252     }
253 }
254 
255 static void do_outp(uint32_t addr,
256         unsigned long size, uint32_t val)
257 {
258     switch (size) {
259         case 1:
260             return cpu_outb(addr, val);
261         case 2:
262             return cpu_outw(addr, val);
263         case 4:
264             return cpu_outl(addr, val);
265         default:
266             hw_error("outp: bad size: %04x %lx", addr, size);
267     }
268 }
269 
270 /*
271  * Helper functions which read/write an object from/to physical guest
272  * memory, as part of the implementation of an ioreq.
273  *
274  * Equivalent to
275  *   cpu_physical_memory_rw(addr + (req->df ? -1 : +1) * req->size * i,
276  *                          val, req->size, 0/1)
277  * except without the integer overflow problems.
278  */
279 static void rw_phys_req_item(hwaddr addr,
280                              ioreq_t *req, uint32_t i, void *val, int rw)
281 {
282     /* Do everything unsigned so overflow just results in a truncated result
283      * and accesses to undesired parts of guest memory, which is up
284      * to the guest */
285     hwaddr offset = (hwaddr)req->size * i;
286     if (req->df) {
287         addr -= offset;
288     } else {
289         addr += offset;
290     }
291     cpu_physical_memory_rw(addr, val, req->size, rw);
292 }
293 
294 static inline void read_phys_req_item(hwaddr addr,
295                                       ioreq_t *req, uint32_t i, void *val)
296 {
297     rw_phys_req_item(addr, req, i, val, 0);
298 }
299 static inline void write_phys_req_item(hwaddr addr,
300                                        ioreq_t *req, uint32_t i, void *val)
301 {
302     rw_phys_req_item(addr, req, i, val, 1);
303 }
304 
305 
306 void cpu_ioreq_pio(ioreq_t *req)
307 {
308     uint32_t i;
309 
310     trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr,
311                          req->data, req->count, req->size);
312 
313     if (req->size > sizeof(uint32_t)) {
314         hw_error("PIO: bad size (%u)", req->size);
315     }
316 
317     if (req->dir == IOREQ_READ) {
318         if (!req->data_is_ptr) {
319             req->data = do_inp(req->addr, req->size);
320             trace_cpu_ioreq_pio_read_reg(req, req->data, req->addr,
321                                          req->size);
322         } else {
323             uint32_t tmp;
324 
325             for (i = 0; i < req->count; i++) {
326                 tmp = do_inp(req->addr, req->size);
327                 write_phys_req_item(req->data, req, i, &tmp);
328             }
329         }
330     } else if (req->dir == IOREQ_WRITE) {
331         if (!req->data_is_ptr) {
332             trace_cpu_ioreq_pio_write_reg(req, req->data, req->addr,
333                                           req->size);
334             do_outp(req->addr, req->size, req->data);
335         } else {
336             for (i = 0; i < req->count; i++) {
337                 uint32_t tmp = 0;
338 
339                 read_phys_req_item(req->data, req, i, &tmp);
340                 do_outp(req->addr, req->size, tmp);
341             }
342         }
343     }
344 }
345 
346 static void cpu_ioreq_move(ioreq_t *req)
347 {
348     uint32_t i;
349 
350     trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr,
351                          req->data, req->count, req->size);
352 
353     if (req->size > sizeof(req->data)) {
354         hw_error("MMIO: bad size (%u)", req->size);
355     }
356 
357     if (!req->data_is_ptr) {
358         if (req->dir == IOREQ_READ) {
359             for (i = 0; i < req->count; i++) {
360                 read_phys_req_item(req->addr, req, i, &req->data);
361             }
362         } else if (req->dir == IOREQ_WRITE) {
363             for (i = 0; i < req->count; i++) {
364                 write_phys_req_item(req->addr, req, i, &req->data);
365             }
366         }
367     } else {
368         uint64_t tmp;
369 
370         if (req->dir == IOREQ_READ) {
371             for (i = 0; i < req->count; i++) {
372                 read_phys_req_item(req->addr, req, i, &tmp);
373                 write_phys_req_item(req->data, req, i, &tmp);
374             }
375         } else if (req->dir == IOREQ_WRITE) {
376             for (i = 0; i < req->count; i++) {
377                 read_phys_req_item(req->data, req, i, &tmp);
378                 write_phys_req_item(req->addr, req, i, &tmp);
379             }
380         }
381     }
382 }
383 
384 static void cpu_ioreq_config(XenIOState *state, ioreq_t *req)
385 {
386     uint32_t sbdf = req->addr >> 32;
387     uint32_t reg = req->addr;
388     XenPciDevice *xendev;
389 
390     if (req->size != sizeof(uint8_t) && req->size != sizeof(uint16_t) &&
391         req->size != sizeof(uint32_t)) {
392         hw_error("PCI config access: bad size (%u)", req->size);
393     }
394 
395     if (req->count != 1) {
396         hw_error("PCI config access: bad count (%u)", req->count);
397     }
398 
399     QLIST_FOREACH(xendev, &state->dev_list, entry) {
400         if (xendev->sbdf != sbdf) {
401             continue;
402         }
403 
404         if (!req->data_is_ptr) {
405             if (req->dir == IOREQ_READ) {
406                 req->data = pci_host_config_read_common(
407                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
408                     req->size);
409                 trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
410                                             req->size, req->data);
411             } else if (req->dir == IOREQ_WRITE) {
412                 trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
413                                              req->size, req->data);
414                 pci_host_config_write_common(
415                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
416                     req->data, req->size);
417             }
418         } else {
419             uint32_t tmp;
420 
421             if (req->dir == IOREQ_READ) {
422                 tmp = pci_host_config_read_common(
423                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
424                     req->size);
425                 trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
426                                             req->size, tmp);
427                 write_phys_req_item(req->data, req, 0, &tmp);
428             } else if (req->dir == IOREQ_WRITE) {
429                 read_phys_req_item(req->data, req, 0, &tmp);
430                 trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
431                                              req->size, tmp);
432                 pci_host_config_write_common(
433                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
434                     tmp, req->size);
435             }
436         }
437     }
438 }
439 
440 static void handle_ioreq(XenIOState *state, ioreq_t *req)
441 {
442     trace_handle_ioreq(req, req->type, req->dir, req->df, req->data_is_ptr,
443                        req->addr, req->data, req->count, req->size);
444 
445     if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) &&
446             (req->size < sizeof (target_ulong))) {
447         req->data &= ((target_ulong) 1 << (8 * req->size)) - 1;
448     }
449 
450     if (req->dir == IOREQ_WRITE)
451         trace_handle_ioreq_write(req, req->type, req->df, req->data_is_ptr,
452                                  req->addr, req->data, req->count, req->size);
453 
454     switch (req->type) {
455         case IOREQ_TYPE_PIO:
456             cpu_ioreq_pio(req);
457             break;
458         case IOREQ_TYPE_COPY:
459             cpu_ioreq_move(req);
460             break;
461         case IOREQ_TYPE_TIMEOFFSET:
462             break;
463         case IOREQ_TYPE_INVALIDATE:
464             xen_invalidate_map_cache();
465             break;
466         case IOREQ_TYPE_PCI_CONFIG:
467             cpu_ioreq_config(state, req);
468             break;
469         default:
470             arch_handle_ioreq(state, req);
471     }
472     if (req->dir == IOREQ_READ) {
473         trace_handle_ioreq_read(req, req->type, req->df, req->data_is_ptr,
474                                 req->addr, req->data, req->count, req->size);
475     }
476 }
477 
478 static bool handle_buffered_iopage(XenIOState *state)
479 {
480     buffered_iopage_t *buf_page = state->buffered_io_page;
481     buf_ioreq_t *buf_req = NULL;
482     bool handled_ioreq = false;
483     ioreq_t req;
484     int qw;
485 
486     if (!buf_page) {
487         return 0;
488     }
489 
490     memset(&req, 0x00, sizeof(req));
491     req.state = STATE_IOREQ_READY;
492     req.count = 1;
493     req.dir = IOREQ_WRITE;
494 
495     for (;;) {
496         uint32_t rdptr = buf_page->read_pointer, wrptr;
497 
498         xen_rmb();
499         wrptr = buf_page->write_pointer;
500         xen_rmb();
501         if (rdptr != buf_page->read_pointer) {
502             continue;
503         }
504         if (rdptr == wrptr) {
505             break;
506         }
507         buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
508         req.size = 1U << buf_req->size;
509         req.addr = buf_req->addr;
510         req.data = buf_req->data;
511         req.type = buf_req->type;
512         xen_rmb();
513         qw = (req.size == 8);
514         if (qw) {
515             if (rdptr + 1 == wrptr) {
516                 hw_error("Incomplete quad word buffered ioreq");
517             }
518             buf_req = &buf_page->buf_ioreq[(rdptr + 1) %
519                                            IOREQ_BUFFER_SLOT_NUM];
520             req.data |= ((uint64_t)buf_req->data) << 32;
521             xen_rmb();
522         }
523 
524         handle_ioreq(state, &req);
525 
526         /* Only req.data may get updated by handle_ioreq(), albeit even that
527          * should not happen as such data would never make it to the guest (we
528          * can only usefully see writes here after all).
529          */
530         assert(req.state == STATE_IOREQ_READY);
531         assert(req.count == 1);
532         assert(req.dir == IOREQ_WRITE);
533         assert(!req.data_is_ptr);
534 
535         qatomic_add(&buf_page->read_pointer, qw + 1);
536         handled_ioreq = true;
537     }
538 
539     return handled_ioreq;
540 }
541 
542 static void handle_buffered_io(void *opaque)
543 {
544     XenIOState *state = opaque;
545 
546     if (handle_buffered_iopage(state)) {
547         timer_mod(state->buffered_io_timer,
548                 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
549     } else {
550         timer_del(state->buffered_io_timer);
551         qemu_xen_evtchn_unmask(state->xce_handle, state->bufioreq_local_port);
552     }
553 }
554 
555 static void cpu_handle_ioreq(void *opaque)
556 {
557     XenIOState *state = opaque;
558     ioreq_t *req = cpu_get_ioreq(state);
559 
560     handle_buffered_iopage(state);
561     if (req) {
562         ioreq_t copy = *req;
563 
564         xen_rmb();
565         handle_ioreq(state, &copy);
566         req->data = copy.data;
567 
568         if (req->state != STATE_IOREQ_INPROCESS) {
569             warn_report("Badness in I/O request ... not in service?!: "
570                     "%x, ptr: %x, port: %"PRIx64", "
571                     "data: %"PRIx64", count: %u, size: %u, type: %u",
572                     req->state, req->data_is_ptr, req->addr,
573                     req->data, req->count, req->size, req->type);
574             destroy_hvm_domain(false);
575             return;
576         }
577 
578         xen_wmb(); /* Update ioreq contents /then/ update state. */
579 
580         /*
581          * We do this before we send the response so that the tools
582          * have the opportunity to pick up on the reset before the
583          * guest resumes and does a hlt with interrupts disabled which
584          * causes Xen to powerdown the domain.
585          */
586         if (runstate_is_running()) {
587             ShutdownCause request;
588 
589             if (qemu_shutdown_requested_get()) {
590                 destroy_hvm_domain(false);
591             }
592             request = qemu_reset_requested_get();
593             if (request) {
594                 qemu_system_reset(request);
595                 destroy_hvm_domain(true);
596             }
597         }
598 
599         req->state = STATE_IORESP_READY;
600         qemu_xen_evtchn_notify(state->xce_handle,
601                                state->ioreq_local_port[state->send_vcpu]);
602     }
603 }
604 
605 static void xen_main_loop_prepare(XenIOState *state)
606 {
607     int evtchn_fd = -1;
608 
609     if (state->xce_handle != NULL) {
610         evtchn_fd = qemu_xen_evtchn_fd(state->xce_handle);
611     }
612 
613     state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io,
614                                                  state);
615 
616     if (evtchn_fd != -1) {
617         CPUState *cpu_state;
618 
619         CPU_FOREACH(cpu_state) {
620             trace_xen_main_loop_prepare_init_cpu(cpu_state->cpu_index,
621                                                  cpu_state);
622             state->cpu_by_vcpu_id[cpu_state->cpu_index] = cpu_state;
623         }
624         qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state);
625     }
626 }
627 
628 
629 void xen_hvm_change_state_handler(void *opaque, bool running,
630                                          RunState rstate)
631 {
632     XenIOState *state = opaque;
633 
634     if (running) {
635         xen_main_loop_prepare(state);
636     }
637 
638     xen_set_ioreq_server_state(xen_domid,
639                                state->ioservid,
640                                running);
641 }
642 
643 void xen_exit_notifier(Notifier *n, void *data)
644 {
645     XenIOState *state = container_of(n, XenIOState, exit);
646 
647     xen_destroy_ioreq_server(xen_domid, state->ioservid);
648     if (state->fres != NULL) {
649         xenforeignmemory_unmap_resource(xen_fmem, state->fres);
650     }
651 
652     qemu_xen_evtchn_close(state->xce_handle);
653     xs_daemon_close(state->xenstore);
654 }
655 
656 static int xen_map_ioreq_server(XenIOState *state)
657 {
658     void *addr = NULL;
659     xen_pfn_t ioreq_pfn;
660     xen_pfn_t bufioreq_pfn;
661     evtchn_port_t bufioreq_evtchn;
662     int rc;
663 
664     /*
665      * Attempt to map using the resource API and fall back to normal
666      * foreign mapping if this is not supported.
667      */
668     QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_bufioreq != 0);
669     QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_ioreq(0) != 1);
670     state->fres = xenforeignmemory_map_resource(xen_fmem, xen_domid,
671                                          XENMEM_resource_ioreq_server,
672                                          state->ioservid, 0, 2,
673                                          &addr,
674                                          PROT_READ | PROT_WRITE, 0);
675     if (state->fres != NULL) {
676         trace_xen_map_resource_ioreq(state->ioservid, addr);
677         state->buffered_io_page = addr;
678         state->shared_page = addr + XC_PAGE_SIZE;
679     } else if (errno != EOPNOTSUPP) {
680         error_report("failed to map ioreq server resources: error %d handle=%p",
681                      errno, xen_xc);
682         return -1;
683     }
684 
685     rc = xen_get_ioreq_server_info(xen_domid, state->ioservid,
686                                    (state->shared_page == NULL) ?
687                                    &ioreq_pfn : NULL,
688                                    (state->buffered_io_page == NULL) ?
689                                    &bufioreq_pfn : NULL,
690                                    &bufioreq_evtchn);
691     if (rc < 0) {
692         error_report("failed to get ioreq server info: error %d handle=%p",
693                      errno, xen_xc);
694         return rc;
695     }
696 
697     if (state->shared_page == NULL) {
698         trace_xen_map_ioreq_server_shared_page(ioreq_pfn);
699 
700         state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid,
701                                                   PROT_READ | PROT_WRITE,
702                                                   1, &ioreq_pfn, NULL);
703         if (state->shared_page == NULL) {
704             error_report("map shared IO page returned error %d handle=%p",
705                          errno, xen_xc);
706         }
707     }
708 
709     if (state->buffered_io_page == NULL) {
710         trace_xen_map_ioreq_server_buffered_io_page(bufioreq_pfn);
711 
712         state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid,
713                                                        PROT_READ | PROT_WRITE,
714                                                        1, &bufioreq_pfn,
715                                                        NULL);
716         if (state->buffered_io_page == NULL) {
717             error_report("map buffered IO page returned error %d", errno);
718             return -1;
719         }
720     }
721 
722     if (state->shared_page == NULL || state->buffered_io_page == NULL) {
723         return -1;
724     }
725 
726     trace_xen_map_ioreq_server_buffered_io_evtchn(bufioreq_evtchn);
727 
728     state->bufioreq_remote_port = bufioreq_evtchn;
729 
730     return 0;
731 }
732 
733 void destroy_hvm_domain(bool reboot)
734 {
735     xc_interface *xc_handle;
736     int sts;
737     int rc;
738 
739     unsigned int reason = reboot ? SHUTDOWN_reboot : SHUTDOWN_poweroff;
740 
741     if (xen_dmod) {
742         rc = xendevicemodel_shutdown(xen_dmod, xen_domid, reason);
743         if (!rc) {
744             return;
745         }
746         if (errno != ENOTTY /* old Xen */) {
747             error_report("xendevicemodel_shutdown failed with error %d", errno);
748         }
749         /* well, try the old thing then */
750     }
751 
752     xc_handle = xc_interface_open(0, 0, 0);
753     if (xc_handle == NULL) {
754         trace_destroy_hvm_domain_cannot_acquire_handle();
755     } else {
756         sts = xc_domain_shutdown(xc_handle, xen_domid, reason);
757         if (sts != 0) {
758             trace_destroy_hvm_domain_failed_action(
759                 reboot ? "reboot" : "poweroff", sts, strerror(errno)
760             );
761         } else {
762             trace_destroy_hvm_domain_action(
763                 xen_domid, reboot ? "reboot" : "poweroff"
764             );
765         }
766         xc_interface_close(xc_handle);
767     }
768 }
769 
770 void xen_shutdown_fatal_error(const char *fmt, ...)
771 {
772     va_list ap;
773 
774     va_start(ap, fmt);
775     error_vreport(fmt, ap);
776     va_end(ap);
777     error_report("Will destroy the domain.");
778     /* destroy the domain */
779     qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR);
780 }
781 
782 static void xen_do_ioreq_register(XenIOState *state,
783                                   unsigned int max_cpus,
784                                   const MemoryListener *xen_memory_listener)
785 {
786     int i, rc;
787 
788     state->exit.notify = xen_exit_notifier;
789     qemu_add_exit_notifier(&state->exit);
790 
791     /*
792      * Register wake-up support in QMP query-current-machine API
793      */
794     qemu_register_wakeup_support();
795 
796     rc = xen_map_ioreq_server(state);
797     if (rc < 0) {
798         goto err;
799     }
800 
801     /* Note: cpus is empty at this point in init */
802     state->cpu_by_vcpu_id = g_new0(CPUState *, max_cpus);
803 
804     rc = xen_set_ioreq_server_state(xen_domid, state->ioservid, true);
805     if (rc < 0) {
806         error_report("failed to enable ioreq server info: error %d handle=%p",
807                      errno, xen_xc);
808         goto err;
809     }
810 
811     state->ioreq_local_port = g_new0(evtchn_port_t, max_cpus);
812 
813     /* FIXME: how about if we overflow the page here? */
814     for (i = 0; i < max_cpus; i++) {
815         rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
816                                               xen_vcpu_eport(state->shared_page,
817                                                              i));
818         if (rc == -1) {
819             error_report("shared evtchn %d bind error %d", i, errno);
820             goto err;
821         }
822         state->ioreq_local_port[i] = rc;
823     }
824 
825     rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
826                                           state->bufioreq_remote_port);
827     if (rc == -1) {
828         error_report("buffered evtchn bind error %d", errno);
829         goto err;
830     }
831     state->bufioreq_local_port = rc;
832 
833     /* Init RAM management */
834 #ifdef XEN_COMPAT_PHYSMAP
835     xen_map_cache_init(xen_phys_offset_to_gaddr, state);
836 #else
837     xen_map_cache_init(NULL, state);
838 #endif
839 
840     qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
841 
842     state->memory_listener = *xen_memory_listener;
843     memory_listener_register(&state->memory_listener, &address_space_memory);
844 
845     state->io_listener = xen_io_listener;
846     memory_listener_register(&state->io_listener, &address_space_io);
847 
848     state->device_listener = xen_device_listener;
849     QLIST_INIT(&state->dev_list);
850     device_listener_register(&state->device_listener);
851 
852     return;
853 
854 err:
855     error_report("xen hardware virtual machine initialisation failed");
856     exit(1);
857 }
858 
859 void xen_register_ioreq(XenIOState *state, unsigned int max_cpus,
860                         const MemoryListener *xen_memory_listener)
861 {
862     int rc;
863 
864     setup_xen_backend_ops();
865 
866     state->xce_handle = qemu_xen_evtchn_open();
867     if (state->xce_handle == NULL) {
868         error_report("xen: event channel open failed with error %d", errno);
869         goto err;
870     }
871 
872     state->xenstore = xs_daemon_open();
873     if (state->xenstore == NULL) {
874         error_report("xen: xenstore open failed with error %d", errno);
875         goto err;
876     }
877 
878     rc = xen_create_ioreq_server(xen_domid, &state->ioservid);
879     if (!rc) {
880         xen_do_ioreq_register(state, max_cpus, xen_memory_listener);
881     } else {
882         warn_report("xen: failed to create ioreq server");
883     }
884 
885     xen_bus_init();
886 
887     return;
888 
889 err:
890     error_report("xen hardware virtual machine backend registration failed");
891     exit(1);
892 }
893