xref: /openbmc/qemu/hw/xen/xen-hvm-common.c (revision 410b4d56)
1 #include "qemu/osdep.h"
2 #include "qemu/units.h"
3 #include "qapi/error.h"
4 #include "exec/target_page.h"
5 #include "trace.h"
6 
7 #include "hw/pci/pci_host.h"
8 #include "hw/xen/xen-hvm-common.h"
9 #include "hw/xen/xen-bus.h"
10 #include "hw/boards.h"
11 #include "hw/xen/arch_hvm.h"
12 
13 MemoryRegion xen_memory, xen_grants;
14 
15 /* Check for any kind of xen memory, foreign mappings or grants.  */
xen_mr_is_memory(MemoryRegion * mr)16 bool xen_mr_is_memory(MemoryRegion *mr)
17 {
18     return mr == &xen_memory || mr == &xen_grants;
19 }
20 
21 /* Check specifically for grants.  */
xen_mr_is_grants(MemoryRegion * mr)22 bool xen_mr_is_grants(MemoryRegion *mr)
23 {
24     return mr == &xen_grants;
25 }
26 
xen_ram_alloc(ram_addr_t ram_addr,ram_addr_t size,MemoryRegion * mr,Error ** errp)27 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
28                    Error **errp)
29 {
30     unsigned target_page_bits = qemu_target_page_bits();
31     unsigned long nr_pfn;
32     xen_pfn_t *pfn_list;
33     int i;
34 
35     if (runstate_check(RUN_STATE_INMIGRATE)) {
36         /* RAM already populated in Xen */
37         warn_report("%s: do not alloc "RAM_ADDR_FMT
38                 " bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE",
39                 __func__, size, ram_addr);
40         return;
41     }
42 
43     if (xen_mr_is_memory(mr)) {
44         return;
45     }
46 
47     trace_xen_ram_alloc(ram_addr, size);
48 
49     nr_pfn = size >> target_page_bits;
50     pfn_list = g_new(xen_pfn_t, nr_pfn);
51 
52     for (i = 0; i < nr_pfn; i++) {
53         pfn_list[i] = (ram_addr >> target_page_bits) + i;
54     }
55 
56     if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
57         error_setg(errp, "xen: failed to populate ram at " RAM_ADDR_FMT,
58                    ram_addr);
59     }
60 
61     g_free(pfn_list);
62 }
63 
xen_set_memory(struct MemoryListener * listener,MemoryRegionSection * section,bool add)64 static void xen_set_memory(struct MemoryListener *listener,
65                            MemoryRegionSection *section,
66                            bool add)
67 {
68     XenIOState *state = container_of(listener, XenIOState, memory_listener);
69 
70     if (xen_mr_is_memory(section->mr)) {
71         return;
72     } else {
73         if (add) {
74             xen_map_memory_section(xen_domid, state->ioservid,
75                                    section);
76         } else {
77             xen_unmap_memory_section(xen_domid, state->ioservid,
78                                      section);
79         }
80     }
81 
82     arch_xen_set_memory(state, section, add);
83 }
84 
xen_region_add(MemoryListener * listener,MemoryRegionSection * section)85 void xen_region_add(MemoryListener *listener,
86                            MemoryRegionSection *section)
87 {
88     memory_region_ref(section->mr);
89     xen_set_memory(listener, section, true);
90 }
91 
xen_region_del(MemoryListener * listener,MemoryRegionSection * section)92 void xen_region_del(MemoryListener *listener,
93                            MemoryRegionSection *section)
94 {
95     xen_set_memory(listener, section, false);
96     memory_region_unref(section->mr);
97 }
98 
xen_io_add(MemoryListener * listener,MemoryRegionSection * section)99 void xen_io_add(MemoryListener *listener,
100                        MemoryRegionSection *section)
101 {
102     XenIOState *state = container_of(listener, XenIOState, io_listener);
103     MemoryRegion *mr = section->mr;
104 
105     if (mr->ops == &unassigned_io_ops) {
106         return;
107     }
108 
109     memory_region_ref(mr);
110 
111     xen_map_io_section(xen_domid, state->ioservid, section);
112 }
113 
xen_io_del(MemoryListener * listener,MemoryRegionSection * section)114 void xen_io_del(MemoryListener *listener,
115                        MemoryRegionSection *section)
116 {
117     XenIOState *state = container_of(listener, XenIOState, io_listener);
118     MemoryRegion *mr = section->mr;
119 
120     if (mr->ops == &unassigned_io_ops) {
121         return;
122     }
123 
124     xen_unmap_io_section(xen_domid, state->ioservid, section);
125 
126     memory_region_unref(mr);
127 }
128 
xen_device_realize(DeviceListener * listener,DeviceState * dev)129 void xen_device_realize(DeviceListener *listener,
130                                DeviceState *dev)
131 {
132     XenIOState *state = container_of(listener, XenIOState, device_listener);
133 
134     if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
135         PCIDevice *pci_dev = PCI_DEVICE(dev);
136         XenPciDevice *xendev = g_new(XenPciDevice, 1);
137 
138         xendev->pci_dev = pci_dev;
139         xendev->sbdf = PCI_BUILD_BDF(pci_dev_bus_num(pci_dev),
140                                      pci_dev->devfn);
141         QLIST_INSERT_HEAD(&state->dev_list, xendev, entry);
142 
143         xen_map_pcidev(xen_domid, state->ioservid, pci_dev);
144     }
145 }
146 
xen_device_unrealize(DeviceListener * listener,DeviceState * dev)147 void xen_device_unrealize(DeviceListener *listener,
148                                  DeviceState *dev)
149 {
150     XenIOState *state = container_of(listener, XenIOState, device_listener);
151 
152     if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
153         PCIDevice *pci_dev = PCI_DEVICE(dev);
154         XenPciDevice *xendev, *next;
155 
156         xen_unmap_pcidev(xen_domid, state->ioservid, pci_dev);
157 
158         QLIST_FOREACH_SAFE(xendev, &state->dev_list, entry, next) {
159             if (xendev->pci_dev == pci_dev) {
160                 QLIST_REMOVE(xendev, entry);
161                 g_free(xendev);
162                 break;
163             }
164         }
165     }
166 }
167 
168 MemoryListener xen_io_listener = {
169     .name = "xen-io",
170     .region_add = xen_io_add,
171     .region_del = xen_io_del,
172     .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
173 };
174 
175 DeviceListener xen_device_listener = {
176     .realize = xen_device_realize,
177     .unrealize = xen_device_unrealize,
178 };
179 
180 /* get the ioreq packets from share mem */
cpu_get_ioreq_from_shared_memory(XenIOState * state,int vcpu)181 static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu)
182 {
183     ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu);
184 
185     if (req->state != STATE_IOREQ_READY) {
186         trace_cpu_get_ioreq_from_shared_memory_req_not_ready(req->state,
187                                                              req->data_is_ptr,
188                                                              req->addr,
189                                                              req->data,
190                                                              req->count,
191                                                              req->size);
192         return NULL;
193     }
194 
195     xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */
196 
197     req->state = STATE_IOREQ_INPROCESS;
198     return req;
199 }
200 
201 /* use poll to get the port notification */
202 /* ioreq_vec--out,the */
203 /* retval--the number of ioreq packet */
cpu_get_ioreq(XenIOState * state)204 static ioreq_t *cpu_get_ioreq(XenIOState *state)
205 {
206     MachineState *ms = MACHINE(qdev_get_machine());
207     unsigned int max_cpus = ms->smp.max_cpus;
208     int i;
209     evtchn_port_t port;
210 
211     port = qemu_xen_evtchn_pending(state->xce_handle);
212     if (port == state->bufioreq_local_port) {
213         timer_mod(state->buffered_io_timer,
214                 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
215         return NULL;
216     }
217 
218     if (port != -1) {
219         for (i = 0; i < max_cpus; i++) {
220             if (state->ioreq_local_port[i] == port) {
221                 break;
222             }
223         }
224 
225         if (i == max_cpus) {
226             hw_error("Fatal error while trying to get io event!\n");
227         }
228 
229         /* unmask the wanted port again */
230         qemu_xen_evtchn_unmask(state->xce_handle, port);
231 
232         /* get the io packet from shared memory */
233         state->send_vcpu = i;
234         return cpu_get_ioreq_from_shared_memory(state, i);
235     }
236 
237     /* read error or read nothing */
238     return NULL;
239 }
240 
do_inp(uint32_t addr,unsigned long size)241 static uint32_t do_inp(uint32_t addr, unsigned long size)
242 {
243     switch (size) {
244         case 1:
245             return cpu_inb(addr);
246         case 2:
247             return cpu_inw(addr);
248         case 4:
249             return cpu_inl(addr);
250         default:
251             hw_error("inp: bad size: %04x %lx", addr, size);
252     }
253 }
254 
do_outp(uint32_t addr,unsigned long size,uint32_t val)255 static void do_outp(uint32_t addr,
256         unsigned long size, uint32_t val)
257 {
258     switch (size) {
259         case 1:
260             return cpu_outb(addr, val);
261         case 2:
262             return cpu_outw(addr, val);
263         case 4:
264             return cpu_outl(addr, val);
265         default:
266             hw_error("outp: bad size: %04x %lx", addr, size);
267     }
268 }
269 
270 /*
271  * Helper functions which read/write an object from/to physical guest
272  * memory, as part of the implementation of an ioreq.
273  *
274  * Equivalent to
275  *   cpu_physical_memory_rw(addr + (req->df ? -1 : +1) * req->size * i,
276  *                          val, req->size, 0/1)
277  * except without the integer overflow problems.
278  */
rw_phys_req_item(hwaddr addr,ioreq_t * req,uint32_t i,void * val,int rw)279 static void rw_phys_req_item(hwaddr addr,
280                              ioreq_t *req, uint32_t i, void *val, int rw)
281 {
282     /* Do everything unsigned so overflow just results in a truncated result
283      * and accesses to undesired parts of guest memory, which is up
284      * to the guest */
285     hwaddr offset = (hwaddr)req->size * i;
286     if (req->df) {
287         addr -= offset;
288     } else {
289         addr += offset;
290     }
291     cpu_physical_memory_rw(addr, val, req->size, rw);
292 }
293 
read_phys_req_item(hwaddr addr,ioreq_t * req,uint32_t i,void * val)294 static inline void read_phys_req_item(hwaddr addr,
295                                       ioreq_t *req, uint32_t i, void *val)
296 {
297     rw_phys_req_item(addr, req, i, val, 0);
298 }
write_phys_req_item(hwaddr addr,ioreq_t * req,uint32_t i,void * val)299 static inline void write_phys_req_item(hwaddr addr,
300                                        ioreq_t *req, uint32_t i, void *val)
301 {
302     rw_phys_req_item(addr, req, i, val, 1);
303 }
304 
305 
cpu_ioreq_pio(ioreq_t * req)306 void cpu_ioreq_pio(ioreq_t *req)
307 {
308     uint32_t i;
309 
310     trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr,
311                          req->data, req->count, req->size);
312 
313     if (req->size > sizeof(uint32_t)) {
314         hw_error("PIO: bad size (%u)", req->size);
315     }
316 
317     if (req->dir == IOREQ_READ) {
318         if (!req->data_is_ptr) {
319             req->data = do_inp(req->addr, req->size);
320             trace_cpu_ioreq_pio_read_reg(req, req->data, req->addr,
321                                          req->size);
322         } else {
323             uint32_t tmp;
324 
325             for (i = 0; i < req->count; i++) {
326                 tmp = do_inp(req->addr, req->size);
327                 write_phys_req_item(req->data, req, i, &tmp);
328             }
329         }
330     } else if (req->dir == IOREQ_WRITE) {
331         if (!req->data_is_ptr) {
332             trace_cpu_ioreq_pio_write_reg(req, req->data, req->addr,
333                                           req->size);
334             do_outp(req->addr, req->size, req->data);
335         } else {
336             for (i = 0; i < req->count; i++) {
337                 uint32_t tmp = 0;
338 
339                 read_phys_req_item(req->data, req, i, &tmp);
340                 do_outp(req->addr, req->size, tmp);
341             }
342         }
343     }
344 }
345 
cpu_ioreq_move(ioreq_t * req)346 static void cpu_ioreq_move(ioreq_t *req)
347 {
348     uint32_t i;
349 
350     trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr,
351                          req->data, req->count, req->size);
352 
353     if (req->size > sizeof(req->data)) {
354         hw_error("MMIO: bad size (%u)", req->size);
355     }
356 
357     if (!req->data_is_ptr) {
358         if (req->dir == IOREQ_READ) {
359             for (i = 0; i < req->count; i++) {
360                 read_phys_req_item(req->addr, req, i, &req->data);
361             }
362         } else if (req->dir == IOREQ_WRITE) {
363             for (i = 0; i < req->count; i++) {
364                 write_phys_req_item(req->addr, req, i, &req->data);
365             }
366         }
367     } else {
368         uint64_t tmp;
369 
370         if (req->dir == IOREQ_READ) {
371             for (i = 0; i < req->count; i++) {
372                 read_phys_req_item(req->addr, req, i, &tmp);
373                 write_phys_req_item(req->data, req, i, &tmp);
374             }
375         } else if (req->dir == IOREQ_WRITE) {
376             for (i = 0; i < req->count; i++) {
377                 read_phys_req_item(req->data, req, i, &tmp);
378                 write_phys_req_item(req->addr, req, i, &tmp);
379             }
380         }
381     }
382 }
383 
cpu_ioreq_config(XenIOState * state,ioreq_t * req)384 static void cpu_ioreq_config(XenIOState *state, ioreq_t *req)
385 {
386     uint32_t sbdf = req->addr >> 32;
387     uint32_t reg = req->addr;
388     XenPciDevice *xendev;
389 
390     if (req->size != sizeof(uint8_t) && req->size != sizeof(uint16_t) &&
391         req->size != sizeof(uint32_t)) {
392         hw_error("PCI config access: bad size (%u)", req->size);
393     }
394 
395     if (req->count != 1) {
396         hw_error("PCI config access: bad count (%u)", req->count);
397     }
398 
399     QLIST_FOREACH(xendev, &state->dev_list, entry) {
400         if (xendev->sbdf != sbdf) {
401             continue;
402         }
403 
404         if (!req->data_is_ptr) {
405             if (req->dir == IOREQ_READ) {
406                 req->data = pci_host_config_read_common(
407                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
408                     req->size);
409                 trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
410                                             req->size, req->data);
411             } else if (req->dir == IOREQ_WRITE) {
412                 trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
413                                              req->size, req->data);
414                 pci_host_config_write_common(
415                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
416                     req->data, req->size);
417             }
418         } else {
419             uint32_t tmp;
420 
421             if (req->dir == IOREQ_READ) {
422                 tmp = pci_host_config_read_common(
423                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
424                     req->size);
425                 trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
426                                             req->size, tmp);
427                 write_phys_req_item(req->data, req, 0, &tmp);
428             } else if (req->dir == IOREQ_WRITE) {
429                 read_phys_req_item(req->data, req, 0, &tmp);
430                 trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
431                                              req->size, tmp);
432                 pci_host_config_write_common(
433                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
434                     tmp, req->size);
435             }
436         }
437     }
438 }
439 
handle_ioreq(XenIOState * state,ioreq_t * req)440 static void handle_ioreq(XenIOState *state, ioreq_t *req)
441 {
442     trace_handle_ioreq(req, req->type, req->dir, req->df, req->data_is_ptr,
443                        req->addr, req->data, req->count, req->size);
444 
445     if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) &&
446             (req->size < sizeof (target_ulong))) {
447         req->data &= ((target_ulong) 1 << (8 * req->size)) - 1;
448     }
449 
450     if (req->dir == IOREQ_WRITE)
451         trace_handle_ioreq_write(req, req->type, req->df, req->data_is_ptr,
452                                  req->addr, req->data, req->count, req->size);
453 
454     switch (req->type) {
455         case IOREQ_TYPE_PIO:
456             cpu_ioreq_pio(req);
457             break;
458         case IOREQ_TYPE_COPY:
459             cpu_ioreq_move(req);
460             break;
461         case IOREQ_TYPE_TIMEOFFSET:
462             break;
463         case IOREQ_TYPE_INVALIDATE:
464             xen_invalidate_map_cache();
465             break;
466         case IOREQ_TYPE_PCI_CONFIG:
467             cpu_ioreq_config(state, req);
468             break;
469         default:
470             arch_handle_ioreq(state, req);
471     }
472     if (req->dir == IOREQ_READ) {
473         trace_handle_ioreq_read(req, req->type, req->df, req->data_is_ptr,
474                                 req->addr, req->data, req->count, req->size);
475     }
476 }
477 
handle_buffered_iopage(XenIOState * state)478 static unsigned int handle_buffered_iopage(XenIOState *state)
479 {
480     buffered_iopage_t *buf_page = state->buffered_io_page;
481     buf_ioreq_t *buf_req = NULL;
482     unsigned int handled = 0;
483     ioreq_t req;
484     int qw;
485 
486     if (!buf_page) {
487         return 0;
488     }
489 
490     memset(&req, 0x00, sizeof(req));
491     req.state = STATE_IOREQ_READY;
492     req.count = 1;
493     req.dir = IOREQ_WRITE;
494 
495     do {
496         uint32_t rdptr = buf_page->read_pointer, wrptr;
497 
498         xen_rmb();
499         wrptr = buf_page->write_pointer;
500         xen_rmb();
501         if (rdptr != buf_page->read_pointer) {
502             continue;
503         }
504         if (rdptr == wrptr) {
505             break;
506         }
507         buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
508         req.size = 1U << buf_req->size;
509         req.addr = buf_req->addr;
510         req.data = buf_req->data;
511         req.type = buf_req->type;
512         xen_rmb();
513         qw = (req.size == 8);
514         if (qw) {
515             if (rdptr + 1 == wrptr) {
516                 hw_error("Incomplete quad word buffered ioreq");
517             }
518             buf_req = &buf_page->buf_ioreq[(rdptr + 1) %
519                                            IOREQ_BUFFER_SLOT_NUM];
520             req.data |= ((uint64_t)buf_req->data) << 32;
521             xen_rmb();
522         }
523 
524         handle_ioreq(state, &req);
525 
526         /* Only req.data may get updated by handle_ioreq(), albeit even that
527          * should not happen as such data would never make it to the guest (we
528          * can only usefully see writes here after all).
529          */
530         assert(req.state == STATE_IOREQ_READY);
531         assert(req.count == 1);
532         assert(req.dir == IOREQ_WRITE);
533         assert(!req.data_is_ptr);
534 
535         qatomic_add(&buf_page->read_pointer, qw + 1);
536         handled += qw + 1;
537     } while (handled < IOREQ_BUFFER_SLOT_NUM);
538 
539     return handled;
540 }
541 
handle_buffered_io(void * opaque)542 static void handle_buffered_io(void *opaque)
543 {
544     unsigned int handled;
545     XenIOState *state = opaque;
546 
547     handled = handle_buffered_iopage(state);
548     if (handled >= IOREQ_BUFFER_SLOT_NUM) {
549         /* We handled a full page of ioreqs. Schedule a timer to continue
550          * processing while giving other stuff a chance to run.
551          */
552         timer_mod(state->buffered_io_timer,
553                 qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
554     } else if (handled == 0) {
555         timer_del(state->buffered_io_timer);
556         qemu_xen_evtchn_unmask(state->xce_handle, state->bufioreq_local_port);
557     } else {
558         timer_mod(state->buffered_io_timer,
559                 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
560     }
561 }
562 
cpu_handle_ioreq(void * opaque)563 static void cpu_handle_ioreq(void *opaque)
564 {
565     XenIOState *state = opaque;
566     ioreq_t *req = cpu_get_ioreq(state);
567 
568     handle_buffered_iopage(state);
569     if (req) {
570         ioreq_t copy = *req;
571 
572         xen_rmb();
573         handle_ioreq(state, &copy);
574         req->data = copy.data;
575 
576         if (req->state != STATE_IOREQ_INPROCESS) {
577             warn_report("Badness in I/O request ... not in service?!: "
578                     "%x, ptr: %x, port: %"PRIx64", "
579                     "data: %"PRIx64", count: %u, size: %u, type: %u",
580                     req->state, req->data_is_ptr, req->addr,
581                     req->data, req->count, req->size, req->type);
582             destroy_hvm_domain(false);
583             return;
584         }
585 
586         xen_wmb(); /* Update ioreq contents /then/ update state. */
587 
588         /*
589          * We do this before we send the response so that the tools
590          * have the opportunity to pick up on the reset before the
591          * guest resumes and does a hlt with interrupts disabled which
592          * causes Xen to powerdown the domain.
593          */
594         if (runstate_is_running()) {
595             ShutdownCause request;
596 
597             if (qemu_shutdown_requested_get()) {
598                 destroy_hvm_domain(false);
599             }
600             request = qemu_reset_requested_get();
601             if (request) {
602                 qemu_system_reset(request);
603                 destroy_hvm_domain(true);
604             }
605         }
606 
607         req->state = STATE_IORESP_READY;
608         qemu_xen_evtchn_notify(state->xce_handle,
609                                state->ioreq_local_port[state->send_vcpu]);
610     }
611 }
612 
xen_main_loop_prepare(XenIOState * state)613 static void xen_main_loop_prepare(XenIOState *state)
614 {
615     int evtchn_fd = -1;
616 
617     if (state->xce_handle != NULL) {
618         evtchn_fd = qemu_xen_evtchn_fd(state->xce_handle);
619     }
620 
621     state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io,
622                                                  state);
623 
624     if (evtchn_fd != -1) {
625         CPUState *cpu_state;
626 
627         CPU_FOREACH(cpu_state) {
628             trace_xen_main_loop_prepare_init_cpu(cpu_state->cpu_index,
629                                                  cpu_state);
630             state->cpu_by_vcpu_id[cpu_state->cpu_index] = cpu_state;
631         }
632         qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state);
633     }
634 }
635 
636 
xen_hvm_change_state_handler(void * opaque,bool running,RunState rstate)637 void xen_hvm_change_state_handler(void *opaque, bool running,
638                                          RunState rstate)
639 {
640     XenIOState *state = opaque;
641 
642     if (running) {
643         xen_main_loop_prepare(state);
644     }
645 
646     xen_set_ioreq_server_state(xen_domid,
647                                state->ioservid,
648                                running);
649 }
650 
xen_exit_notifier(Notifier * n,void * data)651 void xen_exit_notifier(Notifier *n, void *data)
652 {
653     XenIOState *state = container_of(n, XenIOState, exit);
654 
655     xen_destroy_ioreq_server(xen_domid, state->ioservid);
656     if (state->fres != NULL) {
657         xenforeignmemory_unmap_resource(xen_fmem, state->fres);
658     }
659 
660     qemu_xen_evtchn_close(state->xce_handle);
661     xs_daemon_close(state->xenstore);
662 }
663 
xen_map_ioreq_server(XenIOState * state)664 static int xen_map_ioreq_server(XenIOState *state)
665 {
666     void *addr = NULL;
667     xen_pfn_t ioreq_pfn;
668     xen_pfn_t bufioreq_pfn;
669     evtchn_port_t bufioreq_evtchn;
670     int rc;
671 
672     /*
673      * Attempt to map using the resource API and fall back to normal
674      * foreign mapping if this is not supported.
675      */
676     QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_bufioreq != 0);
677     QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_ioreq(0) != 1);
678     state->fres = xenforeignmemory_map_resource(xen_fmem, xen_domid,
679                                          XENMEM_resource_ioreq_server,
680                                          state->ioservid, 0, 2,
681                                          &addr,
682                                          PROT_READ | PROT_WRITE, 0);
683     if (state->fres != NULL) {
684         trace_xen_map_resource_ioreq(state->ioservid, addr);
685         state->buffered_io_page = addr;
686         state->shared_page = addr + XC_PAGE_SIZE;
687     } else if (errno != EOPNOTSUPP) {
688         error_report("failed to map ioreq server resources: error %d handle=%p",
689                      errno, xen_xc);
690         return -1;
691     }
692 
693     rc = xen_get_ioreq_server_info(xen_domid, state->ioservid,
694                                    (state->shared_page == NULL) ?
695                                    &ioreq_pfn : NULL,
696                                    (state->buffered_io_page == NULL) ?
697                                    &bufioreq_pfn : NULL,
698                                    &bufioreq_evtchn);
699     if (rc < 0) {
700         error_report("failed to get ioreq server info: error %d handle=%p",
701                      errno, xen_xc);
702         return rc;
703     }
704 
705     if (state->shared_page == NULL) {
706         trace_xen_map_ioreq_server_shared_page(ioreq_pfn);
707 
708         state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid,
709                                                   PROT_READ | PROT_WRITE,
710                                                   1, &ioreq_pfn, NULL);
711         if (state->shared_page == NULL) {
712             error_report("map shared IO page returned error %d handle=%p",
713                          errno, xen_xc);
714         }
715     }
716 
717     if (state->buffered_io_page == NULL) {
718         trace_xen_map_ioreq_server_buffered_io_page(bufioreq_pfn);
719 
720         state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid,
721                                                        PROT_READ | PROT_WRITE,
722                                                        1, &bufioreq_pfn,
723                                                        NULL);
724         if (state->buffered_io_page == NULL) {
725             error_report("map buffered IO page returned error %d", errno);
726             return -1;
727         }
728     }
729 
730     if (state->shared_page == NULL || state->buffered_io_page == NULL) {
731         return -1;
732     }
733 
734     trace_xen_map_ioreq_server_buffered_io_evtchn(bufioreq_evtchn);
735 
736     state->bufioreq_remote_port = bufioreq_evtchn;
737 
738     return 0;
739 }
740 
destroy_hvm_domain(bool reboot)741 void destroy_hvm_domain(bool reboot)
742 {
743     xc_interface *xc_handle;
744     int sts;
745     int rc;
746 
747     unsigned int reason = reboot ? SHUTDOWN_reboot : SHUTDOWN_poweroff;
748 
749     if (xen_dmod) {
750         rc = xendevicemodel_shutdown(xen_dmod, xen_domid, reason);
751         if (!rc) {
752             return;
753         }
754         if (errno != ENOTTY /* old Xen */) {
755             error_report("xendevicemodel_shutdown failed with error %d", errno);
756         }
757         /* well, try the old thing then */
758     }
759 
760     xc_handle = xc_interface_open(0, 0, 0);
761     if (xc_handle == NULL) {
762         trace_destroy_hvm_domain_cannot_acquire_handle();
763     } else {
764         sts = xc_domain_shutdown(xc_handle, xen_domid, reason);
765         if (sts != 0) {
766             trace_destroy_hvm_domain_failed_action(
767                 reboot ? "reboot" : "poweroff", sts, strerror(errno)
768             );
769         } else {
770             trace_destroy_hvm_domain_action(
771                 xen_domid, reboot ? "reboot" : "poweroff"
772             );
773         }
774         xc_interface_close(xc_handle);
775     }
776 }
777 
xen_shutdown_fatal_error(const char * fmt,...)778 void xen_shutdown_fatal_error(const char *fmt, ...)
779 {
780     va_list ap;
781 
782     va_start(ap, fmt);
783     error_vreport(fmt, ap);
784     va_end(ap);
785     error_report("Will destroy the domain.");
786     /* destroy the domain */
787     qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR);
788 }
789 
xen_do_ioreq_register(XenIOState * state,unsigned int max_cpus,const MemoryListener * xen_memory_listener)790 static void xen_do_ioreq_register(XenIOState *state,
791                                   unsigned int max_cpus,
792                                   const MemoryListener *xen_memory_listener)
793 {
794     int i, rc;
795 
796     state->exit.notify = xen_exit_notifier;
797     qemu_add_exit_notifier(&state->exit);
798 
799     /*
800      * Register wake-up support in QMP query-current-machine API
801      */
802     qemu_register_wakeup_support();
803 
804     rc = xen_map_ioreq_server(state);
805     if (rc < 0) {
806         goto err;
807     }
808 
809     /* Note: cpus is empty at this point in init */
810     state->cpu_by_vcpu_id = g_new0(CPUState *, max_cpus);
811 
812     rc = xen_set_ioreq_server_state(xen_domid, state->ioservid, true);
813     if (rc < 0) {
814         error_report("failed to enable ioreq server info: error %d handle=%p",
815                      errno, xen_xc);
816         goto err;
817     }
818 
819     state->ioreq_local_port = g_new0(evtchn_port_t, max_cpus);
820 
821     /* FIXME: how about if we overflow the page here? */
822     for (i = 0; i < max_cpus; i++) {
823         rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
824                                               xen_vcpu_eport(state->shared_page,
825                                                              i));
826         if (rc == -1) {
827             error_report("shared evtchn %d bind error %d", i, errno);
828             goto err;
829         }
830         state->ioreq_local_port[i] = rc;
831     }
832 
833     rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
834                                           state->bufioreq_remote_port);
835     if (rc == -1) {
836         error_report("buffered evtchn bind error %d", errno);
837         goto err;
838     }
839     state->bufioreq_local_port = rc;
840 
841     /* Init RAM management */
842 #ifdef XEN_COMPAT_PHYSMAP
843     xen_map_cache_init(xen_phys_offset_to_gaddr, state);
844 #else
845     xen_map_cache_init(NULL, state);
846 #endif
847 
848     qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
849 
850     state->memory_listener = *xen_memory_listener;
851     memory_listener_register(&state->memory_listener, &address_space_memory);
852 
853     state->io_listener = xen_io_listener;
854     memory_listener_register(&state->io_listener, &address_space_io);
855 
856     state->device_listener = xen_device_listener;
857     QLIST_INIT(&state->dev_list);
858     device_listener_register(&state->device_listener);
859 
860     return;
861 
862 err:
863     error_report("xen hardware virtual machine initialisation failed");
864     exit(1);
865 }
866 
xen_register_ioreq(XenIOState * state,unsigned int max_cpus,const MemoryListener * xen_memory_listener)867 void xen_register_ioreq(XenIOState *state, unsigned int max_cpus,
868                         const MemoryListener *xen_memory_listener)
869 {
870     int rc;
871 
872     setup_xen_backend_ops();
873 
874     state->xce_handle = qemu_xen_evtchn_open();
875     if (state->xce_handle == NULL) {
876         error_report("xen: event channel open failed with error %d", errno);
877         goto err;
878     }
879 
880     state->xenstore = xs_daemon_open();
881     if (state->xenstore == NULL) {
882         error_report("xen: xenstore open failed with error %d", errno);
883         goto err;
884     }
885 
886     rc = xen_create_ioreq_server(xen_domid, &state->ioservid);
887     if (!rc) {
888         xen_do_ioreq_register(state, max_cpus, xen_memory_listener);
889     } else {
890         warn_report("xen: failed to create ioreq server");
891     }
892 
893     xen_bus_init();
894 
895     return;
896 
897 err:
898     error_report("xen hardware virtual machine backend registration failed");
899     exit(1);
900 }
901