xref: /openbmc/qemu/hw/i386/xen/xen_platform.c (revision 372b69f5)
1 /*
2  * XEN platform pci device, formerly known as the event channel device
3  *
4  * Copyright (c) 2003-2004 Intel Corp.
5  * Copyright (c) 2006 XenSource
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qapi/error.h"
28 #include "hw/ide/pci.h"
29 #include "hw/pci/pci.h"
30 #include "migration/vmstate.h"
31 #include "net/net.h"
32 #include "trace.h"
33 #include "sysemu/xen.h"
34 #include "sysemu/block-backend.h"
35 #include "qemu/error-report.h"
36 #include "qemu/module.h"
37 #include "qom/object.h"
38 
39 #ifdef CONFIG_XEN
40 #include "hw/xen/xen_native.h"
41 #endif
42 
43 /* The rule is that xen_native.h must come first */
44 #include "hw/xen/xen.h"
45 
46 //#define DEBUG_PLATFORM
47 
48 #ifdef DEBUG_PLATFORM
49 #define DPRINTF(fmt, ...) do { \
50     fprintf(stderr, "xen_platform: " fmt, ## __VA_ARGS__); \
51 } while (0)
52 #else
53 #define DPRINTF(fmt, ...) do { } while (0)
54 #endif
55 
56 #define PFFLAG_ROM_LOCK 1 /* Sets whether ROM memory area is RW or RO */
57 
58 struct PCIXenPlatformState {
59     /*< private >*/
60     PCIDevice parent_obj;
61     /*< public >*/
62 
63     MemoryRegion fixed_io;
64     MemoryRegion bar;
65     MemoryRegion mmio_bar;
66     uint8_t flags; /* used only for version_id == 2 */
67     uint16_t driver_product_version;
68 
69     /* Log from guest drivers */
70     char log_buffer[4096];
71     int log_buffer_off;
72 };
73 
74 #define TYPE_XEN_PLATFORM "xen-platform"
75 OBJECT_DECLARE_SIMPLE_TYPE(PCIXenPlatformState, XEN_PLATFORM)
76 
77 #define XEN_PLATFORM_IOPORT 0x10
78 
79 /* Send bytes to syslog */
80 static void log_writeb(PCIXenPlatformState *s, char val)
81 {
82     if (val == '\n' || s->log_buffer_off == sizeof(s->log_buffer) - 1) {
83         /* Flush buffer */
84         s->log_buffer[s->log_buffer_off] = 0;
85         trace_xen_platform_log(s->log_buffer);
86         s->log_buffer_off = 0;
87     } else {
88         s->log_buffer[s->log_buffer_off++] = val;
89     }
90 }
91 
92 /*
93  * Unplug device flags.
94  *
95  * The logic got a little confused at some point in the past but this is
96  * what they do now.
97  *
98  * bit 0: Unplug all IDE and SCSI disks.
99  * bit 1: Unplug all NICs.
100  * bit 2: Unplug IDE disks except primary master. This is overridden if
101  *        bit 0 is also present in the mask.
102  * bit 3: Unplug all NVMe disks.
103  *
104  */
105 #define _UNPLUG_IDE_SCSI_DISKS 0
106 #define UNPLUG_IDE_SCSI_DISKS (1u << _UNPLUG_IDE_SCSI_DISKS)
107 
108 #define _UNPLUG_ALL_NICS 1
109 #define UNPLUG_ALL_NICS (1u << _UNPLUG_ALL_NICS)
110 
111 #define _UNPLUG_AUX_IDE_DISKS 2
112 #define UNPLUG_AUX_IDE_DISKS (1u << _UNPLUG_AUX_IDE_DISKS)
113 
114 #define _UNPLUG_NVME_DISKS 3
115 #define UNPLUG_NVME_DISKS (1u << _UNPLUG_NVME_DISKS)
116 
117 static bool pci_device_is_passthrough(PCIDevice *d)
118 {
119     if (!strcmp(d->name, "xen-pci-passthrough")) {
120         return true;
121     }
122 
123     if (xen_mode == XEN_EMULATE && !strcmp(d->name, "vfio-pci")) {
124         return true;
125     }
126 
127     return false;
128 }
129 
130 static void unplug_nic(PCIBus *b, PCIDevice *d, void *o)
131 {
132     /* We have to ignore passthrough devices */
133     if (pci_get_word(d->config + PCI_CLASS_DEVICE) ==
134             PCI_CLASS_NETWORK_ETHERNET
135             && !pci_device_is_passthrough(d)) {
136         object_unparent(OBJECT(d));
137     }
138 }
139 
140 /* Remove the peer of the NIC device. Normally, this would be a tap device. */
141 static void del_nic_peer(NICState *nic, void *opaque)
142 {
143     NetClientState *nc = qemu_get_queue(nic);
144     ObjectClass *klass = module_object_class_by_name(nc->model);
145 
146     /* Only delete peers of PCI NICs that we're about to delete */
147     if (!klass || !object_class_dynamic_cast(klass, TYPE_PCI_DEVICE)) {
148         return;
149     }
150 
151     if (nc->peer)
152         qemu_del_net_client(nc->peer);
153 }
154 
155 static void pci_unplug_nics(PCIBus *bus)
156 {
157     qemu_foreach_nic(del_nic_peer, NULL);
158     pci_for_each_device(bus, 0, unplug_nic, NULL);
159 }
160 
161 /*
162  * The Xen HVM unplug protocol [1] specifies a mechanism to allow guests to
163  * request unplug of 'aux' disks (which is stated to mean all IDE disks,
164  * except the primary master).
165  *
166  * NOTE: The semantics of what happens if unplug of all disks and 'aux' disks
167  *       is simultaneously requested is not clear. The implementation assumes
168  *       that an 'all' request overrides an 'aux' request.
169  *
170  * [1] https://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=docs/misc/hvm-emulated-unplug.pandoc
171  */
172 struct ide_unplug_state {
173     bool aux;
174     int nr_unplugged;
175 };
176 
177 static int ide_dev_unplug(DeviceState *dev, void *_st)
178 {
179     struct ide_unplug_state *st = _st;
180     IDEDevice *idedev;
181     IDEBus *idebus;
182     BlockBackend *blk;
183     int unit;
184 
185     idedev = IDE_DEVICE(object_dynamic_cast(OBJECT(dev), "ide-hd"));
186     if (!idedev) {
187         return 0;
188     }
189 
190     idebus = IDE_BUS(qdev_get_parent_bus(dev));
191 
192     unit = (idedev == idebus->slave);
193     assert(unit || idedev == idebus->master);
194 
195     if (st->aux && !unit && !strcmp(BUS(idebus)->name, "ide.0")) {
196         return 0;
197     }
198 
199     blk = idebus->ifs[unit].blk;
200     if (blk) {
201         blk_drain(blk);
202         blk_flush(blk);
203 
204         blk_detach_dev(blk, DEVICE(idedev));
205         idebus->ifs[unit].blk = NULL;
206         idedev->conf.blk = NULL;
207         monitor_remove_blk(blk);
208         blk_unref(blk);
209     }
210 
211     object_unparent(OBJECT(dev));
212     st->nr_unplugged++;
213 
214     return 0;
215 }
216 
217 static void pci_xen_ide_unplug(PCIDevice *d, bool aux)
218 {
219     struct ide_unplug_state st = { aux, 0 };
220     DeviceState *dev = DEVICE(d);
221 
222     qdev_walk_children(dev, NULL, NULL, ide_dev_unplug, NULL, &st);
223     if (st.nr_unplugged) {
224         pci_device_reset(d);
225     }
226 }
227 
228 static void unplug_disks(PCIBus *b, PCIDevice *d, void *opaque)
229 {
230     uint32_t flags = *(uint32_t *)opaque;
231     bool aux = (flags & UNPLUG_AUX_IDE_DISKS) &&
232         !(flags & UNPLUG_IDE_SCSI_DISKS);
233 
234     /* We have to ignore passthrough devices */
235     if (pci_device_is_passthrough(d))
236         return;
237 
238     switch (pci_get_word(d->config + PCI_CLASS_DEVICE)) {
239     case PCI_CLASS_STORAGE_IDE:
240     case PCI_CLASS_STORAGE_SATA:
241         pci_xen_ide_unplug(d, aux);
242         break;
243 
244     case PCI_CLASS_STORAGE_SCSI:
245         if (!aux) {
246             object_unparent(OBJECT(d));
247         }
248         break;
249 
250     case PCI_CLASS_STORAGE_EXPRESS:
251         if (flags & UNPLUG_NVME_DISKS) {
252             object_unparent(OBJECT(d));
253         }
254 
255     default:
256         break;
257     }
258 }
259 
260 static void pci_unplug_disks(PCIBus *bus, uint32_t flags)
261 {
262     pci_for_each_device(bus, 0, unplug_disks, &flags);
263 }
264 
265 static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t val)
266 {
267     PCIXenPlatformState *s = opaque;
268 
269     switch (addr) {
270     case 0: {
271         PCIDevice *pci_dev = PCI_DEVICE(s);
272         /* Unplug devices. See comment above flag definitions */
273         if (val & (UNPLUG_IDE_SCSI_DISKS | UNPLUG_AUX_IDE_DISKS |
274                    UNPLUG_NVME_DISKS)) {
275             DPRINTF("unplug disks\n");
276             pci_unplug_disks(pci_get_bus(pci_dev), val);
277         }
278         if (val & UNPLUG_ALL_NICS) {
279             DPRINTF("unplug nics\n");
280             pci_unplug_nics(pci_get_bus(pci_dev));
281         }
282         break;
283     }
284     case 2:
285         switch (val) {
286         case 1:
287             DPRINTF("Citrix Windows PV drivers loaded in guest\n");
288             break;
289         case 0:
290             DPRINTF("Guest claimed to be running PV product 0?\n");
291             break;
292         default:
293             DPRINTF("Unknown PV product %d loaded in guest\n", val);
294             break;
295         }
296         s->driver_product_version = val;
297         break;
298     }
299 }
300 
301 static void platform_fixed_ioport_writel(void *opaque, uint32_t addr,
302                                          uint32_t val)
303 {
304     switch (addr) {
305     case 0:
306         /* PV driver version */
307         break;
308     }
309 }
310 
311 static void platform_fixed_ioport_writeb(void *opaque, uint32_t addr, uint32_t val)
312 {
313     PCIXenPlatformState *s = opaque;
314 
315     switch (addr) {
316     case 0: /* Platform flags */
317         if (xen_mode == XEN_EMULATE) {
318             /* XX: Use i440gx/q35 PAM setup to do this? */
319             s->flags = val & PFFLAG_ROM_LOCK;
320 #ifdef CONFIG_XEN
321         } else {
322             hvmmem_type_t mem_type = (val & PFFLAG_ROM_LOCK) ?
323                 HVMMEM_ram_ro : HVMMEM_ram_rw;
324 
325             if (xen_set_mem_type(xen_domid, mem_type, 0xc0, 0x40)) {
326                 DPRINTF("unable to change ro/rw state of ROM memory area!\n");
327             } else {
328                 s->flags = val & PFFLAG_ROM_LOCK;
329                 DPRINTF("changed ro/rw state of ROM memory area. now is %s state.\n",
330                         (mem_type == HVMMEM_ram_ro ? "ro" : "rw"));
331             }
332 #endif
333         }
334         break;
335 
336     case 2:
337         log_writeb(s, val);
338         break;
339     }
340 }
341 
342 static uint32_t platform_fixed_ioport_readw(void *opaque, uint32_t addr)
343 {
344     switch (addr) {
345     case 0:
346         /* Magic value so that you can identify the interface. */
347         return 0x49d2;
348     default:
349         return 0xffff;
350     }
351 }
352 
353 static uint32_t platform_fixed_ioport_readb(void *opaque, uint32_t addr)
354 {
355     PCIXenPlatformState *s = opaque;
356 
357     switch (addr) {
358     case 0:
359         /* Platform flags */
360         return s->flags;
361     case 2:
362         /* Version number */
363         return 1;
364     default:
365         return 0xff;
366     }
367 }
368 
369 static void platform_fixed_ioport_reset(void *opaque)
370 {
371     PCIXenPlatformState *s = opaque;
372 
373     platform_fixed_ioport_writeb(s, 0, 0);
374 }
375 
376 static uint64_t platform_fixed_ioport_read(void *opaque,
377                                            hwaddr addr,
378                                            unsigned size)
379 {
380     switch (size) {
381     case 1:
382         return platform_fixed_ioport_readb(opaque, addr);
383     case 2:
384         return platform_fixed_ioport_readw(opaque, addr);
385     default:
386         return -1;
387     }
388 }
389 
390 static void platform_fixed_ioport_write(void *opaque, hwaddr addr,
391 
392                                         uint64_t val, unsigned size)
393 {
394     switch (size) {
395     case 1:
396         platform_fixed_ioport_writeb(opaque, addr, val);
397         break;
398     case 2:
399         platform_fixed_ioport_writew(opaque, addr, val);
400         break;
401     case 4:
402         platform_fixed_ioport_writel(opaque, addr, val);
403         break;
404     }
405 }
406 
407 
408 static const MemoryRegionOps platform_fixed_io_ops = {
409     .read = platform_fixed_ioport_read,
410     .write = platform_fixed_ioport_write,
411     .valid = {
412         .unaligned = true,
413     },
414     .impl = {
415         .min_access_size = 1,
416         .max_access_size = 4,
417         .unaligned = true,
418     },
419     .endianness = DEVICE_LITTLE_ENDIAN,
420 };
421 
422 static void platform_fixed_ioport_init(PCIXenPlatformState* s)
423 {
424     memory_region_init_io(&s->fixed_io, OBJECT(s), &platform_fixed_io_ops, s,
425                           "xen-fixed", 16);
426     memory_region_add_subregion(get_system_io(), XEN_PLATFORM_IOPORT,
427                                 &s->fixed_io);
428 }
429 
430 /* Xen Platform PCI Device */
431 
432 static uint64_t xen_platform_ioport_readb(void *opaque, hwaddr addr,
433                                           unsigned int size)
434 {
435     if (addr == 0) {
436         return platform_fixed_ioport_readb(opaque, 0);
437     } else {
438         return ~0u;
439     }
440 }
441 
442 static void xen_platform_ioport_writeb(void *opaque, hwaddr addr,
443                                        uint64_t val, unsigned int size)
444 {
445     PCIXenPlatformState *s = opaque;
446     PCIDevice *pci_dev = PCI_DEVICE(s);
447 
448     switch (addr) {
449     case 0: /* Platform flags */
450         platform_fixed_ioport_writeb(opaque, 0, (uint32_t)val);
451         break;
452     case 4:
453         if (val == 1) {
454             /*
455              * SUSE unplug for Xenlinux
456              * xen-kmp used this since xen-3.0.4, instead the official protocol
457              * from xen-3.3+ It did an unconditional "outl(1, (ioaddr + 4));"
458              * Pre VMDP 1.7 used 4 and 8 depending on how VMDP was configured.
459              * If VMDP was to control both disk and LAN it would use 4.
460              * If it controlled just disk or just LAN, it would use 8 below.
461              */
462             pci_unplug_disks(pci_get_bus(pci_dev), UNPLUG_IDE_SCSI_DISKS);
463             pci_unplug_nics(pci_get_bus(pci_dev));
464         }
465         break;
466     case 8:
467         switch (val) {
468         case 1:
469             pci_unplug_disks(pci_get_bus(pci_dev), UNPLUG_IDE_SCSI_DISKS);
470             break;
471         case 2:
472             pci_unplug_nics(pci_get_bus(pci_dev));
473             break;
474         default:
475             log_writeb(s, (uint32_t)val);
476             break;
477         }
478         break;
479     default:
480         break;
481     }
482 }
483 
484 static const MemoryRegionOps xen_pci_io_ops = {
485     .read  = xen_platform_ioport_readb,
486     .write = xen_platform_ioport_writeb,
487     .impl.min_access_size = 1,
488     .impl.max_access_size = 1,
489 };
490 
491 static void platform_ioport_bar_setup(PCIXenPlatformState *d)
492 {
493     memory_region_init_io(&d->bar, OBJECT(d), &xen_pci_io_ops, d,
494                           "xen-pci", 0x100);
495 }
496 
497 static uint64_t platform_mmio_read(void *opaque, hwaddr addr,
498                                    unsigned size)
499 {
500     DPRINTF("Warning: attempted read from physical address "
501             "0x" HWADDR_FMT_plx " in xen platform mmio space\n", addr);
502 
503     return 0;
504 }
505 
506 static void platform_mmio_write(void *opaque, hwaddr addr,
507                                 uint64_t val, unsigned size)
508 {
509     DPRINTF("Warning: attempted write of 0x%"PRIx64" to physical "
510             "address 0x" HWADDR_FMT_plx " in xen platform mmio space\n",
511             val, addr);
512 }
513 
514 static const MemoryRegionOps platform_mmio_handler = {
515     .read = &platform_mmio_read,
516     .write = &platform_mmio_write,
517     .endianness = DEVICE_NATIVE_ENDIAN,
518 };
519 
520 static void platform_mmio_setup(PCIXenPlatformState *d)
521 {
522     memory_region_init_io(&d->mmio_bar, OBJECT(d), &platform_mmio_handler, d,
523                           "xen-mmio", 0x1000000);
524 }
525 
526 static int xen_platform_post_load(void *opaque, int version_id)
527 {
528     PCIXenPlatformState *s = opaque;
529 
530     platform_fixed_ioport_writeb(s, 0, s->flags);
531 
532     return 0;
533 }
534 
535 static const VMStateDescription vmstate_xen_platform = {
536     .name = "platform",
537     .version_id = 4,
538     .minimum_version_id = 4,
539     .post_load = xen_platform_post_load,
540     .fields = (VMStateField[]) {
541         VMSTATE_PCI_DEVICE(parent_obj, PCIXenPlatformState),
542         VMSTATE_UINT8(flags, PCIXenPlatformState),
543         VMSTATE_END_OF_LIST()
544     }
545 };
546 
547 static void xen_platform_realize(PCIDevice *dev, Error **errp)
548 {
549     PCIXenPlatformState *d = XEN_PLATFORM(dev);
550     uint8_t *pci_conf;
551 
552     /* Device will crash on reset if xen is not initialized */
553     if (xen_mode == XEN_DISABLED) {
554         error_setg(errp, "xen-platform device requires a Xen guest");
555         return;
556     }
557 
558     pci_conf = dev->config;
559 
560     pci_set_word(pci_conf + PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
561 
562     pci_config_set_prog_interface(pci_conf, 0);
563 
564     pci_conf[PCI_INTERRUPT_PIN] = 1;
565 
566     platform_ioport_bar_setup(d);
567     pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &d->bar);
568 
569     /* reserve 16MB mmio address for share memory*/
570     platform_mmio_setup(d);
571     pci_register_bar(dev, 1, PCI_BASE_ADDRESS_MEM_PREFETCH,
572                      &d->mmio_bar);
573 
574     platform_fixed_ioport_init(d);
575 }
576 
577 static void platform_reset(DeviceState *dev)
578 {
579     PCIXenPlatformState *s = XEN_PLATFORM(dev);
580 
581     platform_fixed_ioport_reset(s);
582 }
583 
584 static void xen_platform_class_init(ObjectClass *klass, void *data)
585 {
586     DeviceClass *dc = DEVICE_CLASS(klass);
587     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
588 
589     k->realize = xen_platform_realize;
590     k->vendor_id = PCI_VENDOR_ID_XEN;
591     k->device_id = PCI_DEVICE_ID_XEN_PLATFORM;
592     k->class_id = PCI_CLASS_OTHERS << 8 | 0x80;
593     k->subsystem_vendor_id = PCI_VENDOR_ID_XEN;
594     k->subsystem_id = PCI_DEVICE_ID_XEN_PLATFORM;
595     k->revision = 1;
596     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
597     dc->desc = "XEN platform pci device";
598     dc->reset = platform_reset;
599     dc->vmsd = &vmstate_xen_platform;
600 }
601 
602 static const TypeInfo xen_platform_info = {
603     .name          = TYPE_XEN_PLATFORM,
604     .parent        = TYPE_PCI_DEVICE,
605     .instance_size = sizeof(PCIXenPlatformState),
606     .class_init    = xen_platform_class_init,
607     .interfaces = (InterfaceInfo[]) {
608         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
609         { },
610     },
611 };
612 
613 static void xen_platform_register_types(void)
614 {
615     type_register_static(&xen_platform_info);
616 }
617 
618 type_init(xen_platform_register_types)
619