xref: /openbmc/qemu/hw/xen/xen-hvm-common.c (revision 91e4e1832e3c3fb81673ab730253f7f68064b06f)
1 #include "qemu/osdep.h"
2 #include "qemu/units.h"
3 #include "qapi/error.h"
4 #include "exec/target_page.h"
5 #include "trace.h"
6 
7 #include "hw/pci/pci_host.h"
8 #include "hw/xen/xen-hvm-common.h"
9 #include "hw/xen/xen-bus.h"
10 #include "hw/boards.h"
11 #include "hw/xen/arch_hvm.h"
12 
13 MemoryRegion xen_memory, xen_grants;
14 
15 /* Check for any kind of xen memory, foreign mappings or grants.  */
xen_mr_is_memory(MemoryRegion * mr)16 bool xen_mr_is_memory(MemoryRegion *mr)
17 {
18     return mr == &xen_memory || mr == &xen_grants;
19 }
20 
21 /* Check specifically for grants.  */
xen_mr_is_grants(MemoryRegion * mr)22 bool xen_mr_is_grants(MemoryRegion *mr)
23 {
24     return mr == &xen_grants;
25 }
26 
xen_ram_alloc(ram_addr_t ram_addr,ram_addr_t size,MemoryRegion * mr,Error ** errp)27 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
28                    Error **errp)
29 {
30     unsigned target_page_bits = qemu_target_page_bits();
31     unsigned long nr_pfn;
32     xen_pfn_t *pfn_list;
33     int i;
34 
35     if (runstate_check(RUN_STATE_INMIGRATE)) {
36         /* RAM already populated in Xen */
37         warn_report("%s: do not alloc "RAM_ADDR_FMT
38                 " bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE",
39                 __func__, size, ram_addr);
40         return;
41     }
42 
43     if (xen_mr_is_memory(mr)) {
44         return;
45     }
46 
47     trace_xen_ram_alloc(ram_addr, size);
48 
49     nr_pfn = size >> target_page_bits;
50     pfn_list = g_new(xen_pfn_t, nr_pfn);
51 
52     for (i = 0; i < nr_pfn; i++) {
53         pfn_list[i] = (ram_addr >> target_page_bits) + i;
54     }
55 
56     if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
57         error_setg(errp, "xen: failed to populate ram at " RAM_ADDR_FMT,
58                    ram_addr);
59     }
60 
61     g_free(pfn_list);
62 }
63 
xen_set_memory(struct MemoryListener * listener,MemoryRegionSection * section,bool add)64 static void xen_set_memory(struct MemoryListener *listener,
65                            MemoryRegionSection *section,
66                            bool add)
67 {
68     XenIOState *state = container_of(listener, XenIOState, memory_listener);
69 
70     if (xen_mr_is_memory(section->mr)) {
71         return;
72     } else {
73         if (add) {
74             xen_map_memory_section(xen_domid, state->ioservid,
75                                    section);
76         } else {
77             xen_unmap_memory_section(xen_domid, state->ioservid,
78                                      section);
79         }
80     }
81 
82     arch_xen_set_memory(state, section, add);
83 }
84 
xen_region_add(MemoryListener * listener,MemoryRegionSection * section)85 void xen_region_add(MemoryListener *listener,
86                            MemoryRegionSection *section)
87 {
88     memory_region_ref(section->mr);
89     xen_set_memory(listener, section, true);
90 }
91 
xen_region_del(MemoryListener * listener,MemoryRegionSection * section)92 void xen_region_del(MemoryListener *listener,
93                            MemoryRegionSection *section)
94 {
95     xen_set_memory(listener, section, false);
96     memory_region_unref(section->mr);
97 }
98 
xen_io_add(MemoryListener * listener,MemoryRegionSection * section)99 void xen_io_add(MemoryListener *listener,
100                        MemoryRegionSection *section)
101 {
102     XenIOState *state = container_of(listener, XenIOState, io_listener);
103     MemoryRegion *mr = section->mr;
104 
105     if (mr->ops == &unassigned_io_ops) {
106         return;
107     }
108 
109     memory_region_ref(mr);
110 
111     xen_map_io_section(xen_domid, state->ioservid, section);
112 }
113 
xen_io_del(MemoryListener * listener,MemoryRegionSection * section)114 void xen_io_del(MemoryListener *listener,
115                        MemoryRegionSection *section)
116 {
117     XenIOState *state = container_of(listener, XenIOState, io_listener);
118     MemoryRegion *mr = section->mr;
119 
120     if (mr->ops == &unassigned_io_ops) {
121         return;
122     }
123 
124     xen_unmap_io_section(xen_domid, state->ioservid, section);
125 
126     memory_region_unref(mr);
127 }
128 
xen_device_realize(DeviceListener * listener,DeviceState * dev)129 void xen_device_realize(DeviceListener *listener,
130                                DeviceState *dev)
131 {
132     XenIOState *state = container_of(listener, XenIOState, device_listener);
133 
134     if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
135         PCIDevice *pci_dev = PCI_DEVICE(dev);
136         XenPciDevice *xendev = g_new(XenPciDevice, 1);
137 
138         xendev->pci_dev = pci_dev;
139         xendev->sbdf = PCI_BUILD_BDF(pci_dev_bus_num(pci_dev),
140                                      pci_dev->devfn);
141         QLIST_INSERT_HEAD(&state->dev_list, xendev, entry);
142 
143         xen_map_pcidev(xen_domid, state->ioservid, pci_dev);
144     }
145 }
146 
xen_device_unrealize(DeviceListener * listener,DeviceState * dev)147 void xen_device_unrealize(DeviceListener *listener,
148                                  DeviceState *dev)
149 {
150     XenIOState *state = container_of(listener, XenIOState, device_listener);
151 
152     if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
153         PCIDevice *pci_dev = PCI_DEVICE(dev);
154         XenPciDevice *xendev, *next;
155 
156         xen_unmap_pcidev(xen_domid, state->ioservid, pci_dev);
157 
158         QLIST_FOREACH_SAFE(xendev, &state->dev_list, entry, next) {
159             if (xendev->pci_dev == pci_dev) {
160                 QLIST_REMOVE(xendev, entry);
161                 g_free(xendev);
162                 break;
163             }
164         }
165     }
166 }
167 
168 MemoryListener xen_io_listener = {
169     .name = "xen-io",
170     .region_add = xen_io_add,
171     .region_del = xen_io_del,
172     .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
173 };
174 
175 DeviceListener xen_device_listener = {
176     .realize = xen_device_realize,
177     .unrealize = xen_device_unrealize,
178 };
179 
180 /* get the ioreq packets from share mem */
cpu_get_ioreq_from_shared_memory(XenIOState * state,int vcpu)181 static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu)
182 {
183     ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu);
184 
185     if (req->state != STATE_IOREQ_READY) {
186         trace_cpu_get_ioreq_from_shared_memory_req_not_ready(req->state,
187                                                              req->data_is_ptr,
188                                                              req->addr,
189                                                              req->data,
190                                                              req->count,
191                                                              req->size);
192         return NULL;
193     }
194 
195     xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */
196 
197     req->state = STATE_IOREQ_INPROCESS;
198     return req;
199 }
200 
201 /* use poll to get the port notification */
202 /* ioreq_vec--out,the */
203 /* retval--the number of ioreq packet */
cpu_get_ioreq(XenIOState * state)204 static ioreq_t *cpu_get_ioreq(XenIOState *state)
205 {
206     MachineState *ms = MACHINE(qdev_get_machine());
207     unsigned int max_cpus = ms->smp.max_cpus;
208     int i;
209     evtchn_port_t port;
210 
211     port = qemu_xen_evtchn_pending(state->xce_handle);
212     if (port == state->bufioreq_local_port) {
213         timer_mod(state->buffered_io_timer,
214                 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
215         return NULL;
216     }
217 
218     if (port != -1) {
219         for (i = 0; i < max_cpus; i++) {
220             if (state->ioreq_local_port[i] == port) {
221                 break;
222             }
223         }
224 
225         if (i == max_cpus) {
226             hw_error("Fatal error while trying to get io event!\n");
227         }
228 
229         /* unmask the wanted port again */
230         qemu_xen_evtchn_unmask(state->xce_handle, port);
231 
232         /* get the io packet from shared memory */
233         state->send_vcpu = i;
234         return cpu_get_ioreq_from_shared_memory(state, i);
235     }
236 
237     /* read error or read nothing */
238     return NULL;
239 }
240 
do_inp(uint32_t addr,unsigned long size)241 static uint32_t do_inp(uint32_t addr, unsigned long size)
242 {
243     switch (size) {
244         case 1:
245             return cpu_inb(addr);
246         case 2:
247             return cpu_inw(addr);
248         case 4:
249             return cpu_inl(addr);
250         default:
251             hw_error("inp: bad size: %04x %lx", addr, size);
252     }
253 }
254 
do_outp(uint32_t addr,unsigned long size,uint32_t val)255 static void do_outp(uint32_t addr,
256         unsigned long size, uint32_t val)
257 {
258     switch (size) {
259         case 1:
260             return cpu_outb(addr, val);
261         case 2:
262             return cpu_outw(addr, val);
263         case 4:
264             return cpu_outl(addr, val);
265         default:
266             hw_error("outp: bad size: %04x %lx", addr, size);
267     }
268 }
269 
270 /*
271  * Helper functions which read/write an object from/to physical guest
272  * memory, as part of the implementation of an ioreq.
273  *
274  * Equivalent to
275  *   cpu_physical_memory_rw(addr + (req->df ? -1 : +1) * req->size * i,
276  *                          val, req->size, 0/1)
277  * except without the integer overflow problems.
278  */
rw_phys_req_item(hwaddr addr,ioreq_t * req,uint32_t i,void * val,int rw)279 static void rw_phys_req_item(hwaddr addr,
280                              ioreq_t *req, uint32_t i, void *val, int rw)
281 {
282     /* Do everything unsigned so overflow just results in a truncated result
283      * and accesses to undesired parts of guest memory, which is up
284      * to the guest */
285     hwaddr offset = (hwaddr)req->size * i;
286     if (req->df) {
287         addr -= offset;
288     } else {
289         addr += offset;
290     }
291     cpu_physical_memory_rw(addr, val, req->size, rw);
292 }
293 
read_phys_req_item(hwaddr addr,ioreq_t * req,uint32_t i,void * val)294 static inline void read_phys_req_item(hwaddr addr,
295                                       ioreq_t *req, uint32_t i, void *val)
296 {
297     rw_phys_req_item(addr, req, i, val, 0);
298 }
write_phys_req_item(hwaddr addr,ioreq_t * req,uint32_t i,void * val)299 static inline void write_phys_req_item(hwaddr addr,
300                                        ioreq_t *req, uint32_t i, void *val)
301 {
302     rw_phys_req_item(addr, req, i, val, 1);
303 }
304 
305 
cpu_ioreq_pio(ioreq_t * req)306 void cpu_ioreq_pio(ioreq_t *req)
307 {
308     uint32_t i;
309 
310     trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr,
311                          req->data, req->count, req->size);
312 
313     if (req->size > sizeof(uint32_t)) {
314         hw_error("PIO: bad size (%u)", req->size);
315     }
316 
317     if (req->dir == IOREQ_READ) {
318         if (!req->data_is_ptr) {
319             req->data = do_inp(req->addr, req->size);
320             trace_cpu_ioreq_pio_read_reg(req, req->data, req->addr,
321                                          req->size);
322         } else {
323             uint32_t tmp;
324 
325             for (i = 0; i < req->count; i++) {
326                 tmp = do_inp(req->addr, req->size);
327                 write_phys_req_item(req->data, req, i, &tmp);
328             }
329         }
330     } else if (req->dir == IOREQ_WRITE) {
331         if (!req->data_is_ptr) {
332             trace_cpu_ioreq_pio_write_reg(req, req->data, req->addr,
333                                           req->size);
334             do_outp(req->addr, req->size, req->data);
335         } else {
336             for (i = 0; i < req->count; i++) {
337                 uint32_t tmp = 0;
338 
339                 read_phys_req_item(req->data, req, i, &tmp);
340                 do_outp(req->addr, req->size, tmp);
341             }
342         }
343     }
344 }
345 
cpu_ioreq_move(ioreq_t * req)346 static void cpu_ioreq_move(ioreq_t *req)
347 {
348     uint32_t i;
349 
350     trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr,
351                          req->data, req->count, req->size);
352 
353     if (req->size > sizeof(req->data)) {
354         hw_error("MMIO: bad size (%u)", req->size);
355     }
356 
357     if (!req->data_is_ptr) {
358         if (req->dir == IOREQ_READ) {
359             for (i = 0; i < req->count; i++) {
360                 read_phys_req_item(req->addr, req, i, &req->data);
361             }
362         } else if (req->dir == IOREQ_WRITE) {
363             for (i = 0; i < req->count; i++) {
364                 write_phys_req_item(req->addr, req, i, &req->data);
365             }
366         }
367     } else {
368         uint64_t tmp;
369 
370         if (req->dir == IOREQ_READ) {
371             for (i = 0; i < req->count; i++) {
372                 read_phys_req_item(req->addr, req, i, &tmp);
373                 write_phys_req_item(req->data, req, i, &tmp);
374             }
375         } else if (req->dir == IOREQ_WRITE) {
376             for (i = 0; i < req->count; i++) {
377                 read_phys_req_item(req->data, req, i, &tmp);
378                 write_phys_req_item(req->addr, req, i, &tmp);
379             }
380         }
381     }
382 }
383 
cpu_ioreq_config(XenIOState * state,ioreq_t * req)384 static void cpu_ioreq_config(XenIOState *state, ioreq_t *req)
385 {
386     uint32_t sbdf = req->addr >> 32;
387     uint32_t reg = req->addr;
388     XenPciDevice *xendev;
389 
390     if (req->size != sizeof(uint8_t) && req->size != sizeof(uint16_t) &&
391         req->size != sizeof(uint32_t)) {
392         hw_error("PCI config access: bad size (%u)", req->size);
393     }
394 
395     if (req->count != 1) {
396         hw_error("PCI config access: bad count (%u)", req->count);
397     }
398 
399     QLIST_FOREACH(xendev, &state->dev_list, entry) {
400         if (xendev->sbdf != sbdf) {
401             continue;
402         }
403 
404         if (!req->data_is_ptr) {
405             if (req->dir == IOREQ_READ) {
406                 req->data = pci_host_config_read_common(
407                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
408                     req->size);
409                 trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
410                                             req->size, req->data);
411             } else if (req->dir == IOREQ_WRITE) {
412                 trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
413                                              req->size, req->data);
414                 pci_host_config_write_common(
415                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
416                     req->data, req->size);
417             }
418         } else {
419             uint32_t tmp;
420 
421             if (req->dir == IOREQ_READ) {
422                 tmp = pci_host_config_read_common(
423                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
424                     req->size);
425                 trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
426                                             req->size, tmp);
427                 write_phys_req_item(req->data, req, 0, &tmp);
428             } else if (req->dir == IOREQ_WRITE) {
429                 read_phys_req_item(req->data, req, 0, &tmp);
430                 trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
431                                              req->size, tmp);
432                 pci_host_config_write_common(
433                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
434                     tmp, req->size);
435             }
436         }
437     }
438 }
439 
handle_ioreq(XenIOState * state,ioreq_t * req)440 static void handle_ioreq(XenIOState *state, ioreq_t *req)
441 {
442     trace_handle_ioreq(req, req->type, req->dir, req->df, req->data_is_ptr,
443                        req->addr, req->data, req->count, req->size);
444 
445     if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) &&
446             (req->size < sizeof (target_ulong))) {
447         req->data &= ((target_ulong) 1 << (8 * req->size)) - 1;
448     }
449 
450     if (req->dir == IOREQ_WRITE)
451         trace_handle_ioreq_write(req, req->type, req->df, req->data_is_ptr,
452                                  req->addr, req->data, req->count, req->size);
453 
454     switch (req->type) {
455         case IOREQ_TYPE_PIO:
456             cpu_ioreq_pio(req);
457             break;
458         case IOREQ_TYPE_COPY:
459             cpu_ioreq_move(req);
460             break;
461         case IOREQ_TYPE_TIMEOFFSET:
462             break;
463         case IOREQ_TYPE_INVALIDATE:
464             xen_invalidate_map_cache();
465             break;
466         case IOREQ_TYPE_PCI_CONFIG:
467             cpu_ioreq_config(state, req);
468             break;
469         default:
470             arch_handle_ioreq(state, req);
471     }
472     if (req->dir == IOREQ_READ) {
473         trace_handle_ioreq_read(req, req->type, req->df, req->data_is_ptr,
474                                 req->addr, req->data, req->count, req->size);
475     }
476 }
477 
handle_buffered_iopage(XenIOState * state)478 static unsigned int handle_buffered_iopage(XenIOState *state)
479 {
480     buffered_iopage_t *buf_page = state->buffered_io_page;
481     buf_ioreq_t *buf_req = NULL;
482     unsigned int handled = 0;
483     ioreq_t req;
484     int qw;
485 
486     if (!buf_page) {
487         return 0;
488     }
489 
490     memset(&req, 0x00, sizeof(req));
491     req.state = STATE_IOREQ_READY;
492     req.count = 1;
493     req.dir = IOREQ_WRITE;
494 
495     do {
496         uint32_t rdptr = buf_page->read_pointer, wrptr;
497 
498         xen_rmb();
499         wrptr = buf_page->write_pointer;
500         xen_rmb();
501         if (rdptr != buf_page->read_pointer) {
502             continue;
503         }
504         if (rdptr == wrptr) {
505             break;
506         }
507         buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
508         req.size = 1U << buf_req->size;
509         req.addr = buf_req->addr;
510         req.data = buf_req->data;
511         req.type = buf_req->type;
512         xen_rmb();
513         qw = (req.size == 8);
514         if (qw) {
515             if (rdptr + 1 == wrptr) {
516                 hw_error("Incomplete quad word buffered ioreq");
517             }
518             buf_req = &buf_page->buf_ioreq[(rdptr + 1) %
519                                            IOREQ_BUFFER_SLOT_NUM];
520             req.data |= ((uint64_t)buf_req->data) << 32;
521             xen_rmb();
522         }
523 
524         handle_ioreq(state, &req);
525 
526         /* Only req.data may get updated by handle_ioreq(), albeit even that
527          * should not happen as such data would never make it to the guest (we
528          * can only usefully see writes here after all).
529          */
530         assert(req.state == STATE_IOREQ_READY);
531         assert(req.count == 1);
532         assert(req.dir == IOREQ_WRITE);
533         assert(!req.data_is_ptr);
534 
535         qatomic_add(&buf_page->read_pointer, qw + 1);
536         handled += qw + 1;
537     } while (handled < IOREQ_BUFFER_SLOT_NUM);
538 
539     return handled;
540 }
541 
handle_buffered_io(void * opaque)542 static void handle_buffered_io(void *opaque)
543 {
544     unsigned int handled;
545     XenIOState *state = opaque;
546 
547     handled = handle_buffered_iopage(state);
548     if (handled >= IOREQ_BUFFER_SLOT_NUM) {
549         /* We handled a full page of ioreqs. Schedule a timer to continue
550          * processing while giving other stuff a chance to run.
551          */
552         timer_mod(state->buffered_io_timer,
553                 qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
554     } else if (handled == 0) {
555         timer_del(state->buffered_io_timer);
556         qemu_xen_evtchn_unmask(state->xce_handle, state->bufioreq_local_port);
557     } else {
558         timer_mod(state->buffered_io_timer,
559                 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
560     }
561 }
562 
cpu_handle_ioreq(void * opaque)563 static void cpu_handle_ioreq(void *opaque)
564 {
565     XenIOState *state = opaque;
566     ioreq_t *req = cpu_get_ioreq(state);
567 
568     handle_buffered_iopage(state);
569     if (req) {
570         ioreq_t copy = *req;
571 
572         xen_rmb();
573         handle_ioreq(state, &copy);
574         req->data = copy.data;
575 
576         if (req->state != STATE_IOREQ_INPROCESS) {
577             warn_report("Badness in I/O request ... not in service?!: "
578                     "%x, ptr: %x, port: %"PRIx64", "
579                     "data: %"PRIx64", count: %u, size: %u, type: %u",
580                     req->state, req->data_is_ptr, req->addr,
581                     req->data, req->count, req->size, req->type);
582             destroy_hvm_domain(false);
583             return;
584         }
585 
586         xen_wmb(); /* Update ioreq contents /then/ update state. */
587 
588         /*
589          * We do this before we send the response so that the tools
590          * have the opportunity to pick up on the reset before the
591          * guest resumes and does a hlt with interrupts disabled which
592          * causes Xen to powerdown the domain.
593          */
594         if (runstate_is_running()) {
595             ShutdownCause request;
596 
597             if (qemu_shutdown_requested_get()) {
598                 destroy_hvm_domain(false);
599             }
600             request = qemu_reset_requested_get();
601             if (request) {
602                 qemu_system_reset(request);
603                 destroy_hvm_domain(true);
604             }
605         }
606 
607         req->state = STATE_IORESP_READY;
608         qemu_xen_evtchn_notify(state->xce_handle,
609                                state->ioreq_local_port[state->send_vcpu]);
610     }
611 }
612 
xen_main_loop_prepare(XenIOState * state)613 static void xen_main_loop_prepare(XenIOState *state)
614 {
615     int evtchn_fd = -1;
616 
617     if (state->xce_handle != NULL) {
618         evtchn_fd = qemu_xen_evtchn_fd(state->xce_handle);
619     }
620 
621     state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io,
622                                                  state);
623 
624     if (evtchn_fd != -1) {
625         CPUState *cpu_state;
626 
627         CPU_FOREACH(cpu_state) {
628             trace_xen_main_loop_prepare_init_cpu(cpu_state->cpu_index,
629                                                  cpu_state);
630             state->cpu_by_vcpu_id[cpu_state->cpu_index] = cpu_state;
631         }
632         qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state);
633     }
634 }
635 
636 
xen_hvm_change_state_handler(void * opaque,bool running,RunState rstate)637 void xen_hvm_change_state_handler(void *opaque, bool running,
638                                          RunState rstate)
639 {
640     XenIOState *state = opaque;
641 
642     if (running) {
643         xen_main_loop_prepare(state);
644     }
645 
646     xen_set_ioreq_server_state(xen_domid,
647                                state->ioservid,
648                                running);
649 }
650 
xen_exit_notifier(Notifier * n,void * data)651 void xen_exit_notifier(Notifier *n, void *data)
652 {
653     XenIOState *state = container_of(n, XenIOState, exit);
654 
655     xen_destroy_ioreq_server(xen_domid, state->ioservid);
656     if (state->fres != NULL) {
657         xenforeignmemory_unmap_resource(xen_fmem, state->fres);
658     }
659 
660     qemu_xen_evtchn_close(state->xce_handle);
661     xs_daemon_close(state->xenstore);
662 }
663 
xen_map_ioreq_server(XenIOState * state)664 static int xen_map_ioreq_server(XenIOState *state)
665 {
666     void *addr = NULL;
667     xen_pfn_t ioreq_pfn;
668     xen_pfn_t bufioreq_pfn;
669     evtchn_port_t bufioreq_evtchn;
670     unsigned long num_frames = 1;
671     unsigned long frame = 1;
672     int rc;
673 
674     /*
675      * Attempt to map using the resource API and fall back to normal
676      * foreign mapping if this is not supported.
677      */
678     QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_bufioreq != 0);
679     QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_ioreq(0) != 1);
680 
681     if (state->has_bufioreq) {
682         frame = 0;
683         num_frames = 2;
684     }
685     state->fres = xenforeignmemory_map_resource(xen_fmem, xen_domid,
686                                          XENMEM_resource_ioreq_server,
687                                          state->ioservid,
688                                          frame, num_frames,
689                                          &addr,
690                                          PROT_READ | PROT_WRITE, 0);
691     if (state->fres != NULL) {
692         trace_xen_map_resource_ioreq(state->ioservid, addr);
693         state->shared_page = addr;
694         if (state->has_bufioreq) {
695             state->buffered_io_page = addr;
696             state->shared_page = addr + XC_PAGE_SIZE;
697         }
698     } else if (errno != EOPNOTSUPP) {
699         error_report("failed to map ioreq server resources: error %d handle=%p",
700                      errno, xen_xc);
701         return -1;
702     }
703 
704     /*
705      * If we fail to map the shared page with xenforeignmemory_map_resource()
706      * or if we're using buffered ioreqs, we need xen_get_ioreq_server_info()
707      * to provide the the addresses to map the shared page and/or to get the
708      * event-channel port for buffered ioreqs.
709      */
710     if (state->shared_page == NULL || state->has_bufioreq) {
711         rc = xen_get_ioreq_server_info(xen_domid, state->ioservid,
712                                        (state->shared_page == NULL) ?
713                                        &ioreq_pfn : NULL,
714                                        (state->has_bufioreq &&
715                                         state->buffered_io_page == NULL) ?
716                                        &bufioreq_pfn : NULL,
717                                        &bufioreq_evtchn);
718         if (rc < 0) {
719             error_report("failed to get ioreq server info: error %d handle=%p",
720                          errno, xen_xc);
721             return rc;
722         }
723 
724         if (state->shared_page == NULL) {
725             trace_xen_map_ioreq_server_shared_page(ioreq_pfn);
726 
727             state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid,
728                                                       PROT_READ | PROT_WRITE,
729                                                       1, &ioreq_pfn, NULL);
730         }
731         if (state->shared_page == NULL) {
732             error_report("map shared IO page returned error %d handle=%p",
733                          errno, xen_xc);
734         }
735 
736         if (state->has_bufioreq && state->buffered_io_page == NULL) {
737             trace_xen_map_ioreq_server_buffered_io_page(bufioreq_pfn);
738 
739             state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid,
740                                                         PROT_READ | PROT_WRITE,
741                                                         1, &bufioreq_pfn,
742                                                         NULL);
743             if (state->buffered_io_page == NULL) {
744                 error_report("map buffered IO page returned error %d", errno);
745                 return -1;
746             }
747         }
748     }
749 
750     if (state->shared_page == NULL ||
751         (state->has_bufioreq && state->buffered_io_page == NULL)) {
752         return -1;
753     }
754 
755     if (state->has_bufioreq) {
756         trace_xen_map_ioreq_server_buffered_io_evtchn(bufioreq_evtchn);
757         state->bufioreq_remote_port = bufioreq_evtchn;
758     }
759 
760     return 0;
761 }
762 
destroy_hvm_domain(bool reboot)763 void destroy_hvm_domain(bool reboot)
764 {
765     xc_interface *xc_handle;
766     int sts;
767     int rc;
768 
769     unsigned int reason = reboot ? SHUTDOWN_reboot : SHUTDOWN_poweroff;
770 
771     if (xen_dmod) {
772         rc = xendevicemodel_shutdown(xen_dmod, xen_domid, reason);
773         if (!rc) {
774             return;
775         }
776         if (errno != ENOTTY /* old Xen */) {
777             error_report("xendevicemodel_shutdown failed with error %d", errno);
778         }
779         /* well, try the old thing then */
780     }
781 
782     xc_handle = xc_interface_open(0, 0, 0);
783     if (xc_handle == NULL) {
784         trace_destroy_hvm_domain_cannot_acquire_handle();
785     } else {
786         sts = xc_domain_shutdown(xc_handle, xen_domid, reason);
787         if (sts != 0) {
788             trace_destroy_hvm_domain_failed_action(
789                 reboot ? "reboot" : "poweroff", sts, strerror(errno)
790             );
791         } else {
792             trace_destroy_hvm_domain_action(
793                 xen_domid, reboot ? "reboot" : "poweroff"
794             );
795         }
796         xc_interface_close(xc_handle);
797     }
798 }
799 
xen_shutdown_fatal_error(const char * fmt,...)800 void xen_shutdown_fatal_error(const char *fmt, ...)
801 {
802     va_list ap;
803 
804     va_start(ap, fmt);
805     error_vreport(fmt, ap);
806     va_end(ap);
807     error_report("Will destroy the domain.");
808     /* destroy the domain */
809     qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR);
810 }
811 
xen_do_ioreq_register(XenIOState * state,unsigned int max_cpus,const MemoryListener * xen_memory_listener)812 static void xen_do_ioreq_register(XenIOState *state,
813                                   unsigned int max_cpus,
814                                   const MemoryListener *xen_memory_listener)
815 {
816     int i, rc;
817 
818     state->exit.notify = xen_exit_notifier;
819     qemu_add_exit_notifier(&state->exit);
820 
821     /*
822      * Register wake-up support in QMP query-current-machine API
823      */
824     qemu_register_wakeup_support();
825 
826     rc = xen_map_ioreq_server(state);
827     if (rc < 0) {
828         goto err;
829     }
830 
831     /* Note: cpus is empty at this point in init */
832     state->cpu_by_vcpu_id = g_new0(CPUState *, max_cpus);
833 
834     rc = xen_set_ioreq_server_state(xen_domid, state->ioservid, true);
835     if (rc < 0) {
836         error_report("failed to enable ioreq server info: error %d handle=%p",
837                      errno, xen_xc);
838         goto err;
839     }
840 
841     state->ioreq_local_port = g_new0(evtchn_port_t, max_cpus);
842 
843     /* FIXME: how about if we overflow the page here? */
844     for (i = 0; i < max_cpus; i++) {
845         rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
846                                               xen_vcpu_eport(state->shared_page,
847                                                              i));
848         if (rc == -1) {
849             error_report("shared evtchn %d bind error %d", i, errno);
850             goto err;
851         }
852         state->ioreq_local_port[i] = rc;
853     }
854 
855     if (state->has_bufioreq) {
856         rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
857                                               state->bufioreq_remote_port);
858         if (rc == -1) {
859             error_report("buffered evtchn bind error %d", errno);
860             goto err;
861         }
862         state->bufioreq_local_port = rc;
863     }
864     /* Init RAM management */
865 #ifdef XEN_COMPAT_PHYSMAP
866     xen_map_cache_init(xen_phys_offset_to_gaddr, state);
867 #else
868     xen_map_cache_init(NULL, state);
869 #endif
870 
871     qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
872 
873     state->memory_listener = *xen_memory_listener;
874     memory_listener_register(&state->memory_listener, &address_space_memory);
875 
876     state->io_listener = xen_io_listener;
877     memory_listener_register(&state->io_listener, &address_space_io);
878 
879     state->device_listener = xen_device_listener;
880     QLIST_INIT(&state->dev_list);
881     device_listener_register(&state->device_listener);
882 
883     return;
884 
885 err:
886     error_report("xen hardware virtual machine initialisation failed");
887     exit(1);
888 }
889 
xen_register_ioreq(XenIOState * state,unsigned int max_cpus,uint8_t handle_bufioreq,const MemoryListener * xen_memory_listener)890 void xen_register_ioreq(XenIOState *state, unsigned int max_cpus,
891                         uint8_t handle_bufioreq,
892                         const MemoryListener *xen_memory_listener)
893 {
894     int rc;
895 
896     setup_xen_backend_ops();
897 
898     state->xce_handle = qemu_xen_evtchn_open();
899     if (state->xce_handle == NULL) {
900         error_report("xen: event channel open failed with error %d", errno);
901         goto err;
902     }
903 
904     state->xenstore = xs_daemon_open();
905     if (state->xenstore == NULL) {
906         error_report("xen: xenstore open failed with error %d", errno);
907         goto err;
908     }
909 
910     state->has_bufioreq = handle_bufioreq != HVM_IOREQSRV_BUFIOREQ_OFF;
911     rc = xen_create_ioreq_server(xen_domid, handle_bufioreq, &state->ioservid);
912     if (!rc) {
913         xen_do_ioreq_register(state, max_cpus, xen_memory_listener);
914     } else {
915         warn_report("xen: failed to create ioreq server");
916     }
917 
918     xen_bus_init();
919 
920     return;
921 
922 err:
923     error_report("xen hardware virtual machine backend registration failed");
924     exit(1);
925 }
926